qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

mpy.idef (54669B)


      1 /*
      2  *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
      3  *
      4  *  This program is free software; you can redistribute it and/or modify
      5  *  it under the terms of the GNU General Public License as published by
      6  *  the Free Software Foundation; either version 2 of the License, or
      7  *  (at your option) any later version.
      8  *
      9  *  This program is distributed in the hope that it will be useful,
     10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12  *  GNU General Public License for more details.
     13  *
     14  *  You should have received a copy of the GNU General Public License
     15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
     16  */
     17 
     18 /*
     19  * Multiply Instructions
     20  */
     21 
     22 
     23 #define STD_SP_MODES(TAG,OPER,ATR,DST,ACCSEM,SEM,OSEM,SATSEM,RNDSEM)\
     24 Q6INSN(M2_##TAG##_hh_s0, OPER"(Rs.H32,Rt.H32)"OSEM,        ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM(         fGETHALF(1,RsV),fGETHALF(1,RtV))));})\
     25 Q6INSN(M2_##TAG##_hh_s1, OPER"(Rs.H32,Rt.H32):<<1"OSEM,    ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(1,RsV),fGETHALF(1,RtV)))));})\
     26 Q6INSN(M2_##TAG##_hl_s0, OPER"(Rs.H32,Rt.L32)"OSEM,        ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM(         fGETHALF(1,RsV),fGETHALF(0,RtV))));})\
     27 Q6INSN(M2_##TAG##_hl_s1, OPER"(Rs.H32,Rt.L32):<<1"OSEM,    ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(1,RsV),fGETHALF(0,RtV)))));})\
     28 Q6INSN(M2_##TAG##_lh_s0, OPER"(Rs.L32,Rt.H32)"OSEM,        ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM(         fGETHALF(0,RsV),fGETHALF(1,RtV))));})\
     29 Q6INSN(M2_##TAG##_lh_s1, OPER"(Rs.L32,Rt.H32):<<1"OSEM,    ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(0,RsV),fGETHALF(1,RtV)))));})\
     30 Q6INSN(M2_##TAG##_ll_s0, OPER"(Rs.L32,Rt.L32)"OSEM,        ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM(         fGETHALF(0,RsV),fGETHALF(0,RtV))));})\
     31 Q6INSN(M2_##TAG##_ll_s1, OPER"(Rs.L32,Rt.L32):<<1"OSEM,    ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(0,RsV),fGETHALF(0,RtV)))));})
     32 
     33 /*****************************************************/
     34 /* multiply 16x16->32 signed instructions            */
     35 /*****************************************************/
     36 STD_SP_MODES(mpy_acc,    "Rx32+=mpy", ,RxV,RxV+    ,fMPY16SS,          ,fPASS,fPASS)
     37 STD_SP_MODES(mpy_nac,    "Rx32-=mpy", ,RxV,RxV-    ,fMPY16SS,          ,fPASS,fPASS)
     38 STD_SP_MODES(mpy_acc_sat,"Rx32+=mpy", ,RxV,RxV+    ,fMPY16SS,":sat"    ,fSAT, fPASS)
     39 STD_SP_MODES(mpy_nac_sat,"Rx32-=mpy", ,RxV,RxV-    ,fMPY16SS,":sat"    ,fSAT, fPASS)
     40 STD_SP_MODES(mpy,        "Rd32=mpy",  ,RdV,        ,fMPY16SS,          ,fPASS,fPASS)
     41 STD_SP_MODES(mpy_sat,    "Rd32=mpy",  ,RdV,        ,fMPY16SS,":sat"    ,fSAT, fPASS)
     42 STD_SP_MODES(mpy_rnd,    "Rd32=mpy",  ,RdV,        ,fMPY16SS,":rnd"    ,fPASS,fROUND)
     43 STD_SP_MODES(mpy_sat_rnd,"Rd32=mpy",  ,RdV,        ,fMPY16SS,":rnd:sat",fSAT, fROUND)
     44 STD_SP_MODES(mpyd_acc,   "Rxx32+=mpy",,RxxV,RxxV+  ,fMPY16SS,          ,fPASS,fPASS)
     45 STD_SP_MODES(mpyd_nac,   "Rxx32-=mpy",,RxxV,RxxV-  ,fMPY16SS,          ,fPASS,fPASS)
     46 STD_SP_MODES(mpyd,       "Rdd32=mpy", ,RddV,       ,fMPY16SS,          ,fPASS,fPASS)
     47 STD_SP_MODES(mpyd_rnd,   "Rdd32=mpy", ,RddV,       ,fMPY16SS,":rnd"    ,fPASS,fROUND)
     48 
     49 
     50 /*****************************************************/
     51 /* multiply 16x16->32 unsigned instructions          */
     52 /*****************************************************/
     53 #define STD_USP_MODES(TAG,OPER,ATR,DST,ACCSEM,SEM,OSEM,SATSEM,RNDSEM)\
     54 Q6INSN(M2_##TAG##_hh_s0, OPER"(Rs.H32,Rt.H32)"OSEM,        ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM(         fGETUHALF(1,RsV),fGETUHALF(1,RtV))));})\
     55 Q6INSN(M2_##TAG##_hh_s1, OPER"(Rs.H32,Rt.H32):<<1"OSEM,    ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(1,RsV),fGETUHALF(1,RtV)))));})\
     56 Q6INSN(M2_##TAG##_hl_s0, OPER"(Rs.H32,Rt.L32)"OSEM,        ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM(         fGETUHALF(1,RsV),fGETUHALF(0,RtV))));})\
     57 Q6INSN(M2_##TAG##_hl_s1, OPER"(Rs.H32,Rt.L32):<<1"OSEM,    ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(1,RsV),fGETUHALF(0,RtV)))));})\
     58 Q6INSN(M2_##TAG##_lh_s0, OPER"(Rs.L32,Rt.H32)"OSEM,        ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM(         fGETUHALF(0,RsV),fGETUHALF(1,RtV))));})\
     59 Q6INSN(M2_##TAG##_lh_s1, OPER"(Rs.L32,Rt.H32):<<1"OSEM,    ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(0,RsV),fGETUHALF(1,RtV)))));})\
     60 Q6INSN(M2_##TAG##_ll_s0, OPER"(Rs.L32,Rt.L32)"OSEM,        ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM(         fGETUHALF(0,RsV),fGETUHALF(0,RtV))));})\
     61 Q6INSN(M2_##TAG##_ll_s1, OPER"(Rs.L32,Rt.L32):<<1"OSEM,    ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(0,RsV),fGETUHALF(0,RtV)))));})
     62 
     63 STD_USP_MODES(mpyu_acc,    "Rx32+=mpyu", ,RxV,RxV+  ,fMPY16UU,          ,fPASS,fPASS)
     64 STD_USP_MODES(mpyu_nac,    "Rx32-=mpyu", ,RxV,RxV-  ,fMPY16UU,          ,fPASS,fPASS)
     65 STD_USP_MODES(mpyu,        "Rd32=mpyu",  ATTRIBS() ,RdV,  ,fMPY16UU, ,fPASS,fPASS)
     66 STD_USP_MODES(mpyud_acc,   "Rxx32+=mpyu",,RxxV,RxxV+,fMPY16UU,          ,fPASS,fPASS)
     67 STD_USP_MODES(mpyud_nac,   "Rxx32-=mpyu",,RxxV,RxxV-,fMPY16UU,          ,fPASS,fPASS)
     68 STD_USP_MODES(mpyud,       "Rdd32=mpyu", ATTRIBS() ,RddV, ,fMPY16UU, ,fPASS,fPASS)
     69 
     70 /**********************************************/
     71 /* mpy 16x#s8->32                             */
     72 /**********************************************/
     73 
     74 Q6INSN(M2_mpysip,"Rd32=+mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2),
     75 "32-bit Multiply by unsigned immediate",
     76 { fIMMEXT(uiV); RdV=RsV*uiV; })
     77 
     78 Q6INSN(M2_mpysin,"Rd32=-mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2),
     79 "32-bit Multiply by unsigned immediate, negate result",
     80 { RdV=RsV*-uiV; })
     81 
     82 Q6INSN(M2_macsip,"Rx32+=mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2),
     83 "32-bit Multiply-Add by unsigned immediate",
     84 { fIMMEXT(uiV); RxV=RxV + (RsV*uiV);})
     85 
     86 Q6INSN(M2_macsin,"Rx32-=mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2),
     87 "32-bit Multiply-Subtract by unsigned immediate",
     88 { fIMMEXT(uiV); RxV=RxV - (RsV*uiV);})
     89 
     90 
     91 /**********************************************/
     92 /* multiply/mac  32x32->64 instructions       */
     93 /**********************************************/
     94 Q6INSN(M2_dpmpyss_s0,    "Rdd32=mpy(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RddV=fMPY32SS(RsV,RtV);})
     95 Q6INSN(M2_dpmpyss_acc_s0,"Rxx32+=mpy(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV + fMPY32SS(RsV,RtV);})
     96 Q6INSN(M2_dpmpyss_nac_s0,"Rxx32-=mpy(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV - fMPY32SS(RsV,RtV);})
     97 
     98 Q6INSN(M2_dpmpyuu_s0,    "Rdd32=mpyu(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RddV=fMPY32UU(fCAST4u(RsV),fCAST4u(RtV));})
     99 Q6INSN(M2_dpmpyuu_acc_s0,"Rxx32+=mpyu(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV + fMPY32UU(fCAST4u(RsV),fCAST4u(RtV));})
    100 Q6INSN(M2_dpmpyuu_nac_s0,"Rxx32-=mpyu(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV - fMPY32UU(fCAST4u(RsV),fCAST4u(RtV));})
    101 
    102 
    103 /******************************************************/
    104 /* multiply/mac  32x32->32 (upper) instructions       */
    105 /******************************************************/
    106 Q6INSN(M2_mpy_up,        "Rd32=mpy(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32SS(RsV,RtV)>>32;})
    107 Q6INSN(M2_mpy_up_s1,     "Rd32=mpy(Rs32,Rt32):<<1", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32SS(RsV,RtV)>>31;})
    108 Q6INSN(M2_mpy_up_s1_sat, "Rd32=mpy(Rs32,Rt32):<<1:sat", ATTRIBS(),"Multiply 32x32",{RdV=fSAT(fMPY32SS(RsV,RtV)>>31);})
    109 Q6INSN(M2_mpyu_up,       "Rd32=mpyu(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32UU(fCAST4u(RsV),fCAST4u(RtV))>>32;})
    110 Q6INSN(M2_mpysu_up,      "Rd32=mpysu(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32SU(RsV,fCAST4u(RtV))>>32;})
    111 Q6INSN(M2_dpmpyss_rnd_s0,"Rd32=mpy(Rs32,Rt32):rnd", ATTRIBS(),"Multiply 32x32",{RdV=(fMPY32SS(RsV,RtV)+fCONSTLL(0x80000000))>>32;})
    112 
    113 Q6INSN(M4_mac_up_s1_sat, "Rx32+=mpy(Rs32,Rt32):<<1:sat", ATTRIBS(),"Multiply 32x32",{RxV=fSAT(  (fSE32_64(RxV)) + (fMPY32SS(RsV,RtV)>>31));})
    114 Q6INSN(M4_nac_up_s1_sat, "Rx32-=mpy(Rs32,Rt32):<<1:sat", ATTRIBS(),"Multiply 32x32",{RxV=fSAT(  (fSE32_64(RxV)) - (fMPY32SS(RsV,RtV)>>31));})
    115 
    116 
    117 /**********************************************/
    118 /* 32x32->32 multiply (lower)                 */
    119 /**********************************************/
    120 
    121 Q6INSN(M2_mpyi,"Rd32=mpyi(Rs32,Rt32)",ATTRIBS(),
    122 "Multiply Integer",
    123 { RdV=RsV*RtV;})
    124 
    125 Q6INSN(M2_maci,"Rx32+=mpyi(Rs32,Rt32)",ATTRIBS(A_ARCHV2),
    126 "Multiply-Accumulate Integer",
    127 { RxV=RxV + RsV*RtV;})
    128 
    129 Q6INSN(M2_mnaci,"Rx32-=mpyi(Rs32,Rt32)",ATTRIBS(A_ARCHV2),
    130 "Multiply-Neg-Accumulate Integer",
    131 { RxV=RxV - RsV*RtV;})
    132 
    133 /****** WHY ARE THESE IN MPY.IDEF? **********/
    134 
    135 Q6INSN(M2_acci,"Rx32+=add(Rs32,Rt32)",ATTRIBS(A_ARCHV2),
    136 "Add with accumulate",
    137 { RxV=RxV + RsV + RtV;})
    138 
    139 Q6INSN(M2_accii,"Rx32+=add(Rs32,#s8)",ATTRIBS(A_ARCHV2),
    140 "Add with accumulate",
    141 { fIMMEXT(siV); RxV=RxV + RsV + siV;})
    142 
    143 Q6INSN(M2_nacci,"Rx32-=add(Rs32,Rt32)",ATTRIBS(A_ARCHV2),
    144 "Add with neg accumulate",
    145 { RxV=RxV - (RsV + RtV);})
    146 
    147 Q6INSN(M2_naccii,"Rx32-=add(Rs32,#s8)",ATTRIBS(A_ARCHV2),
    148 "Add with neg accumulate",
    149 { fIMMEXT(siV); RxV=RxV - (RsV + siV);})
    150 
    151 Q6INSN(M2_subacc,"Rx32+=sub(Rt32,Rs32)",ATTRIBS(A_ARCHV2),
    152 "Sub with accumulate",
    153 { RxV=RxV + RtV - RsV;})
    154 
    155 
    156 
    157 
    158 Q6INSN(M4_mpyrr_addr,"Ry32=add(Ru32,mpyi(Ry32,Rs32))",ATTRIBS(),
    159 "Mpy by immed and add immed",
    160 { RyV = RuV + RsV*RyV;})
    161 
    162 Q6INSN(M4_mpyri_addr_u2,"Rd32=add(Ru32,mpyi(#u6:2,Rs32))",ATTRIBS(),
    163 "Mpy by immed and add immed",
    164 { RdV = RuV + RsV*uiV;})
    165 
    166 Q6INSN(M4_mpyri_addr,"Rd32=add(Ru32,mpyi(Rs32,#u6))",ATTRIBS(),
    167 "Mpy by immed and add immed",
    168 { fIMMEXT(uiV); RdV = RuV + RsV*uiV;})
    169 
    170 
    171 
    172 Q6INSN(M4_mpyri_addi,"Rd32=add(#u6,mpyi(Rs32,#U6))",ATTRIBS(),
    173 "Mpy by immed and add immed",
    174 { fIMMEXT(uiV); RdV = uiV + RsV*UiV;})
    175 
    176 
    177 
    178 Q6INSN(M4_mpyrr_addi,"Rd32=add(#u6,mpyi(Rs32,Rt32))",ATTRIBS(),
    179 "Mpy by immed and add immed",
    180 { fIMMEXT(uiV); RdV = uiV + RsV*RtV;})
    181 
    182 
    183 
    184 
    185 
    186 
    187 
    188 
    189 
    190 
    191 
    192 
    193 
    194 
    195 
    196 
    197 
    198 /**********************************************/
    199 /* vector mac  2x[16x16 -> 32]                */
    200 /**********************************************/
    201 
    202 #undef vmac_sema
    203 #define vmac_sema(N)\
    204 { fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)))));\
    205   fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\
    206 }
    207 Q6INSN(M2_vmpy2s_s0,"Rdd32=vmpyh(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
    208 Q6INSN(M2_vmpy2s_s1,"Rdd32=vmpyh(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
    209 
    210 
    211 #undef vmac_sema
    212 #define vmac_sema(N)\
    213 { fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)))));\
    214   fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\
    215 }
    216 Q6INSN(M2_vmac2s_s0,"Rxx32+=vmpyh(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
    217 Q6INSN(M2_vmac2s_s1,"Rxx32+=vmpyh(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
    218 
    219 #undef vmac_sema
    220 #define vmac_sema(N)\
    221 { fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SU(fGETHALF(0,RsV),fGETUHALF(0,RtV)))));\
    222   fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SU(fGETHALF(1,RsV),fGETUHALF(1,RtV)))));\
    223 }
    224 Q6INSN(M2_vmpy2su_s0,"Rdd32=vmpyhsu(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
    225 Q6INSN(M2_vmpy2su_s1,"Rdd32=vmpyhsu(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
    226 
    227 
    228 #undef vmac_sema
    229 #define vmac_sema(N)\
    230 { fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SU(fGETHALF(0,RsV),fGETUHALF(0,RtV)))));\
    231   fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SU(fGETHALF(1,RsV),fGETUHALF(1,RtV)))));\
    232 }
    233 Q6INSN(M2_vmac2su_s0,"Rxx32+=vmpyhsu(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
    234 Q6INSN(M2_vmac2su_s1,"Rxx32+=vmpyhsu(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
    235 
    236 
    237 
    238 #undef vmac_sema
    239 #define vmac_sema(N)\
    240 { fSETHALF(1,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV))) + 0x8000))));\
    241   fSETHALF(0,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) + 0x8000))));\
    242 }
    243 Q6INSN(M2_vmpy2s_s0pack,"Rd32=vmpyh(Rs32,Rt32):rnd:sat",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(0))
    244 Q6INSN(M2_vmpy2s_s1pack,"Rd32=vmpyh(Rs32,Rt32):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(1))
    245 
    246 
    247 #undef vmac_sema
    248 #define vmac_sema(N)\
    249 { fSETWORD(0,RxxV,fGETWORD(0,RxxV) + fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)));\
    250   fSETWORD(1,RxxV,fGETWORD(1,RxxV) + fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)));\
    251 }
    252 Q6INSN(M2_vmac2,"Rxx32+=vmpyh(Rs32,Rt32)",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(0))
    253 
    254 #undef vmac_sema
    255 #define vmac_sema(N)\
    256 { fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)))));\
    257   fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)))));\
    258 }
    259 Q6INSN(M2_vmpy2es_s0,"Rdd32=vmpyeh(Rss32,Rtt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
    260 Q6INSN(M2_vmpy2es_s1,"Rdd32=vmpyeh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
    261 
    262 #undef vmac_sema
    263 #define vmac_sema(N)\
    264 { fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)))));\
    265   fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)))));\
    266 }
    267 Q6INSN(M2_vmac2es_s0,"Rxx32+=vmpyeh(Rss32,Rtt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
    268 Q6INSN(M2_vmac2es_s1,"Rxx32+=vmpyeh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
    269 
    270 #undef vmac_sema
    271 #define vmac_sema(N)\
    272 { fSETWORD(0,RxxV,fGETWORD(0,RxxV) + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)));\
    273   fSETWORD(1,RxxV,fGETWORD(1,RxxV) + fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)));\
    274 }
    275 Q6INSN(M2_vmac2es,"Rxx32+=vmpyeh(Rss32,Rtt32)",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(0))
    276 
    277 
    278 
    279 
    280 /********************************************************/
    281 /* vrmpyh, aka Big Mac, aka Mac Daddy, aka Mac-ac-ac-ac */
    282 /* vector mac  4x[16x16] + 64 ->64                      */
    283 /********************************************************/
    284 
    285 
    286 #undef vmac_sema
    287 #define vmac_sema(N)\
    288 { RxxV = RxxV + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))\
    289               + fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))\
    290               + fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))\
    291               + fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
    292 }
    293 Q6INSN(M2_vrmac_s0,"Rxx32+=vrmpyh(Rss32,Rtt32)",ATTRIBS(),"Vector Multiply",vmac_sema(0))
    294 
    295 #undef vmac_sema
    296 #define vmac_sema(N)\
    297 { RddV = fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))\
    298        + fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))\
    299        + fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))\
    300        + fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
    301 }
    302 Q6INSN(M2_vrmpy_s0,"Rdd32=vrmpyh(Rss32,Rtt32)",ATTRIBS(),"Vector Multiply",vmac_sema(0))
    303 
    304 
    305 
    306 /******************************************************/
    307 /* vector dual macs. just like complex                */
    308 /******************************************************/
    309 
    310 
    311 /* With round&pack */
    312 #undef dmpy_sema
    313 #define dmpy_sema(N)\
    314 { fSETHALF(0,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))) + \
    315                                   fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))) + 0x8000))));\
    316   fSETHALF(1,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))) + \
    317                                   fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV))) + 0x8000))));\
    318 }
    319 Q6INSN(M2_vdmpyrs_s0,"Rd32=vdmpy(Rss32,Rtt32):rnd:sat",ATTRIBS(),    "vector dual mac w/ round&pack",dmpy_sema(0))
    320 Q6INSN(M2_vdmpyrs_s1,"Rd32=vdmpy(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"vector dual mac w/ round&pack",dmpy_sema(1))
    321 
    322 
    323 
    324 
    325 
    326 /******************************************************/
    327 /* vector byte multiplies                             */
    328 /******************************************************/
    329 
    330 
    331 Q6INSN(M5_vrmpybuu,"Rdd32=vrmpybu(Rss32,Rtt32)",ATTRIBS(),
    332  "vector dual mpy bytes",
    333 {
    334   fSETWORD(0,RddV,(fMPY16SS(fGETUBYTE(0,RssV),fGETUBYTE(0,RttV)) +
    335                    fMPY16SS(fGETUBYTE(1,RssV),fGETUBYTE(1,RttV)) +
    336                    fMPY16SS(fGETUBYTE(2,RssV),fGETUBYTE(2,RttV)) +
    337                    fMPY16SS(fGETUBYTE(3,RssV),fGETUBYTE(3,RttV))));
    338   fSETWORD(1,RddV,(fMPY16SS(fGETUBYTE(4,RssV),fGETUBYTE(4,RttV)) +
    339                    fMPY16SS(fGETUBYTE(5,RssV),fGETUBYTE(5,RttV)) +
    340                    fMPY16SS(fGETUBYTE(6,RssV),fGETUBYTE(6,RttV)) +
    341                    fMPY16SS(fGETUBYTE(7,RssV),fGETUBYTE(7,RttV))));
    342  })
    343 
    344 Q6INSN(M5_vrmacbuu,"Rxx32+=vrmpybu(Rss32,Rtt32)",ATTRIBS(),
    345  "vector dual mac bytes",
    346 {
    347   fSETWORD(0,RxxV,(fGETWORD(0,RxxV) +
    348                    fMPY16SS(fGETUBYTE(0,RssV),fGETUBYTE(0,RttV)) +
    349                    fMPY16SS(fGETUBYTE(1,RssV),fGETUBYTE(1,RttV)) +
    350                    fMPY16SS(fGETUBYTE(2,RssV),fGETUBYTE(2,RttV)) +
    351                    fMPY16SS(fGETUBYTE(3,RssV),fGETUBYTE(3,RttV))));
    352   fSETWORD(1,RxxV,(fGETWORD(1,RxxV) +
    353                    fMPY16SS(fGETUBYTE(4,RssV),fGETUBYTE(4,RttV)) +
    354                    fMPY16SS(fGETUBYTE(5,RssV),fGETUBYTE(5,RttV)) +
    355                    fMPY16SS(fGETUBYTE(6,RssV),fGETUBYTE(6,RttV)) +
    356                    fMPY16SS(fGETUBYTE(7,RssV),fGETUBYTE(7,RttV))));
    357  })
    358 
    359 
    360 Q6INSN(M5_vrmpybsu,"Rdd32=vrmpybsu(Rss32,Rtt32)",ATTRIBS(),
    361  "vector dual mpy bytes",
    362 {
    363   fSETWORD(0,RddV,(fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) +
    364                    fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV)) +
    365                    fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) +
    366                    fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV))));
    367   fSETWORD(1,RddV,(fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) +
    368                    fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV)) +
    369                    fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) +
    370                    fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV))));
    371  })
    372 
    373 Q6INSN(M5_vrmacbsu,"Rxx32+=vrmpybsu(Rss32,Rtt32)",ATTRIBS(),
    374  "vector dual mac bytes",
    375 {
    376   fSETWORD(0,RxxV,(fGETWORD(0,RxxV) +
    377                    fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) +
    378                    fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV)) +
    379                    fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) +
    380                    fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV))));
    381   fSETWORD(1,RxxV,(fGETWORD(1,RxxV) +
    382                    fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) +
    383                    fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV)) +
    384                    fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) +
    385                    fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV))));
    386  })
    387 
    388 
    389 Q6INSN(M5_vmpybuu,"Rdd32=vmpybu(Rs32,Rt32)",ATTRIBS(),
    390  "vector mpy bytes",
    391 {
    392   fSETHALF(0,RddV,(fMPY16SS(fGETUBYTE(0,RsV),fGETUBYTE(0,RtV))));
    393   fSETHALF(1,RddV,(fMPY16SS(fGETUBYTE(1,RsV),fGETUBYTE(1,RtV))));
    394   fSETHALF(2,RddV,(fMPY16SS(fGETUBYTE(2,RsV),fGETUBYTE(2,RtV))));
    395   fSETHALF(3,RddV,(fMPY16SS(fGETUBYTE(3,RsV),fGETUBYTE(3,RtV))));
    396  })
    397 
    398 Q6INSN(M5_vmpybsu,"Rdd32=vmpybsu(Rs32,Rt32)",ATTRIBS(),
    399  "vector mpy bytes",
    400 {
    401   fSETHALF(0,RddV,(fMPY16SS(fGETBYTE(0,RsV),fGETUBYTE(0,RtV))));
    402   fSETHALF(1,RddV,(fMPY16SS(fGETBYTE(1,RsV),fGETUBYTE(1,RtV))));
    403   fSETHALF(2,RddV,(fMPY16SS(fGETBYTE(2,RsV),fGETUBYTE(2,RtV))));
    404   fSETHALF(3,RddV,(fMPY16SS(fGETBYTE(3,RsV),fGETUBYTE(3,RtV))));
    405  })
    406 
    407 
    408 Q6INSN(M5_vmacbuu,"Rxx32+=vmpybu(Rs32,Rt32)",ATTRIBS(),
    409  "vector mac bytes",
    410 {
    411   fSETHALF(0,RxxV,(fGETHALF(0,RxxV)+fMPY16SS(fGETUBYTE(0,RsV),fGETUBYTE(0,RtV))));
    412   fSETHALF(1,RxxV,(fGETHALF(1,RxxV)+fMPY16SS(fGETUBYTE(1,RsV),fGETUBYTE(1,RtV))));
    413   fSETHALF(2,RxxV,(fGETHALF(2,RxxV)+fMPY16SS(fGETUBYTE(2,RsV),fGETUBYTE(2,RtV))));
    414   fSETHALF(3,RxxV,(fGETHALF(3,RxxV)+fMPY16SS(fGETUBYTE(3,RsV),fGETUBYTE(3,RtV))));
    415  })
    416 
    417 Q6INSN(M5_vmacbsu,"Rxx32+=vmpybsu(Rs32,Rt32)",ATTRIBS(),
    418  "vector mac bytes",
    419 {
    420   fSETHALF(0,RxxV,(fGETHALF(0,RxxV)+fMPY16SS(fGETBYTE(0,RsV),fGETUBYTE(0,RtV))));
    421   fSETHALF(1,RxxV,(fGETHALF(1,RxxV)+fMPY16SS(fGETBYTE(1,RsV),fGETUBYTE(1,RtV))));
    422   fSETHALF(2,RxxV,(fGETHALF(2,RxxV)+fMPY16SS(fGETBYTE(2,RsV),fGETUBYTE(2,RtV))));
    423   fSETHALF(3,RxxV,(fGETHALF(3,RxxV)+fMPY16SS(fGETBYTE(3,RsV),fGETUBYTE(3,RtV))));
    424  })
    425 
    426 
    427 
    428 Q6INSN(M5_vdmpybsu,"Rdd32=vdmpybsu(Rss32,Rtt32):sat",ATTRIBS(),
    429  "vector quad mpy bytes",
    430 {
    431   fSETHALF(0,RddV,fSATN(16,(fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) +
    432                             fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV)))));
    433   fSETHALF(1,RddV,fSATN(16,(fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) +
    434                             fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV)))));
    435   fSETHALF(2,RddV,fSATN(16,(fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) +
    436                             fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV)))));
    437   fSETHALF(3,RddV,fSATN(16,(fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) +
    438                             fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV)))));
    439  })
    440 
    441 
    442 Q6INSN(M5_vdmacbsu,"Rxx32+=vdmpybsu(Rss32,Rtt32):sat",ATTRIBS(),
    443  "vector quad mac bytes",
    444 {
    445   fSETHALF(0,RxxV,fSATN(16,(fGETHALF(0,RxxV) +
    446                    fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) +
    447                    fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV)))));
    448   fSETHALF(1,RxxV,fSATN(16,(fGETHALF(1,RxxV) +
    449                    fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) +
    450                    fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV)))));
    451   fSETHALF(2,RxxV,fSATN(16,(fGETHALF(2,RxxV) +
    452                    fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) +
    453                    fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV)))));
    454   fSETHALF(3,RxxV,fSATN(16,(fGETHALF(3,RxxV) +
    455                    fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) +
    456                    fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV)))));
    457  })
    458 
    459 
    460 
    461 /* Full version */
    462 #undef dmpy_sema
    463 #define dmpy_sema(N)\
    464 { fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))) + \
    465                      fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)))));\
    466   fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))) + \
    467                      fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV)))));\
    468 }
    469 Q6INSN(M2_vdmacs_s0,"Rxx32+=vdmpy(Rss32,Rtt32):sat",ATTRIBS(),    "",dmpy_sema(0))
    470 Q6INSN(M2_vdmacs_s1,"Rxx32+=vdmpy(Rss32,Rtt32):<<1:sat",ATTRIBS(),"",dmpy_sema(1))
    471 
    472 #undef dmpy_sema
    473 #define dmpy_sema(N)\
    474 { fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))) + \
    475               fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)))));\
    476   fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))) + \
    477               fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV)))));\
    478 }
    479 
    480 Q6INSN(M2_vdmpys_s0,"Rdd32=vdmpy(Rss32,Rtt32):sat",ATTRIBS(),    "",dmpy_sema(0))
    481 Q6INSN(M2_vdmpys_s1,"Rdd32=vdmpy(Rss32,Rtt32):<<1:sat",ATTRIBS(),"",dmpy_sema(1))
    482 
    483 
    484 
    485 /******************************************************/
    486 /* complex multiply/mac with                          */
    487 /* real&imag are packed together and always saturated */
    488 /* to protect against overflow.                       */
    489 /******************************************************/
    490 
    491 #undef cmpy_sema
    492 #define cmpy_sema(N,CONJMINUS,CONJPLUS)\
    493 { fSETHALF(1,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \
    494                                   fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV))) + 0x8000))));\
    495   fSETHALF(0,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \
    496                                   fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV))) + 0x8000))));\
    497 }
    498 Q6INSN(M2_cmpyrs_s0,"Rd32=cmpy(Rs32,Rt32):rnd:sat",ATTRIBS(),    "Complex Multiply",cmpy_sema(0,+,-))
    499 Q6INSN(M2_cmpyrs_s1,"Rd32=cmpy(Rs32,Rt32):<<1:rnd:sat",ATTRIBS(),"Complex Multiply",cmpy_sema(1,+,-))
    500 
    501 
    502 Q6INSN(M2_cmpyrsc_s0,"Rd32=cmpy(Rs32,Rt32*):rnd:sat",ATTRIBS(A_ARCHV2),    "Complex Multiply",cmpy_sema(0,-,+))
    503 Q6INSN(M2_cmpyrsc_s1,"Rd32=cmpy(Rs32,Rt32*):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+))
    504 
    505 
    506 #undef cmpy_sema
    507 #define cmpy_sema(N,CONJMINUS,CONJPLUS)\
    508 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \
    509                                           fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV)))));\
    510   fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \
    511                                           fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\
    512 }
    513 Q6INSN(M2_cmacs_s0,"Rxx32+=cmpy(Rs32,Rt32):sat",ATTRIBS(),    "Complex Multiply",cmpy_sema(0,+,-))
    514 Q6INSN(M2_cmacs_s1,"Rxx32+=cmpy(Rs32,Rt32):<<1:sat",ATTRIBS(),"Complex Multiply",cmpy_sema(1,+,-))
    515 
    516 /* EJP: Need mac versions w/ CONJ T? */
    517 Q6INSN(M2_cmacsc_s0,"Rxx32+=cmpy(Rs32,Rt32*):sat",ATTRIBS(A_ARCHV2),    "Complex Multiply",cmpy_sema(0,-,+))
    518 Q6INSN(M2_cmacsc_s1,"Rxx32+=cmpy(Rs32,Rt32*):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+))
    519 
    520 
    521 #undef cmpy_sema
    522 #define cmpy_sema(N,CONJMINUS,CONJPLUS)\
    523 { fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \
    524                        fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV)))));\
    525   fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \
    526                        fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\
    527 }
    528 
    529 Q6INSN(M2_cmpys_s0,"Rdd32=cmpy(Rs32,Rt32):sat",ATTRIBS(),    "Complex Multiply",cmpy_sema(0,+,-))
    530 Q6INSN(M2_cmpys_s1,"Rdd32=cmpy(Rs32,Rt32):<<1:sat",ATTRIBS(),"Complex Multiply",cmpy_sema(1,+,-))
    531 
    532 Q6INSN(M2_cmpysc_s0,"Rdd32=cmpy(Rs32,Rt32*):sat",ATTRIBS(A_ARCHV2),    "Complex Multiply",cmpy_sema(0,-,+))
    533 Q6INSN(M2_cmpysc_s1,"Rdd32=cmpy(Rs32,Rt32*):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+))
    534 
    535 
    536 
    537 #undef cmpy_sema
    538 #define cmpy_sema(N,CONJMINUS,CONJPLUS)\
    539 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) - (fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \
    540                                            fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV))))));\
    541   fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) - (fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \
    542                                            fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV))))));\
    543 }
    544 Q6INSN(M2_cnacs_s0,"Rxx32-=cmpy(Rs32,Rt32):sat",ATTRIBS(A_ARCHV2),    "Complex Multiply",cmpy_sema(0,+,-))
    545 Q6INSN(M2_cnacs_s1,"Rxx32-=cmpy(Rs32,Rt32):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,+,-))
    546 
    547 /* EJP: need CONJ versions? */
    548 Q6INSN(M2_cnacsc_s0,"Rxx32-=cmpy(Rs32,Rt32*):sat",ATTRIBS(A_ARCHV2),    "Complex Multiply",cmpy_sema(0,-,+))
    549 Q6INSN(M2_cnacsc_s1,"Rxx32-=cmpy(Rs32,Rt32*):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+))
    550 
    551 
    552 /******************************************************/
    553 /* complex interpolation                              */
    554 /* Given a pair of complex values, scale by a,b, sum  */
    555 /* Saturate/shift1 and round/pack                     */
    556 /******************************************************/
    557 
    558 #undef vrcmpys_sema
    559 #define vrcmpys_sema(N,INWORD) \
    560 { fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,INWORD))) + \
    561                        fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(1,INWORD)))));\
    562   fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,INWORD))) + \
    563                        fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(1,INWORD)))));\
    564 }
    565 
    566 
    567 
    568 Q6INSN(M2_vrcmpys_s1_h,"Rdd32=vrcmpys(Rss32,Rtt32):<<1:sat:raw:hi",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(1,RttV)))
    569 Q6INSN(M2_vrcmpys_s1_l,"Rdd32=vrcmpys(Rss32,Rtt32):<<1:sat:raw:lo",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(0,RttV)))
    570 
    571 #undef vrcmpys_sema
    572 #define vrcmpys_sema(N,INWORD) \
    573 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,INWORD))) + \
    574                        fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(1,INWORD)))));\
    575   fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,INWORD))) + \
    576                        fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(1,INWORD)))));\
    577 }
    578 
    579 
    580 
    581 Q6INSN(M2_vrcmpys_acc_s1_h,"Rxx32+=vrcmpys(Rss32,Rtt32):<<1:sat:raw:hi",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(1,RttV)))
    582 Q6INSN(M2_vrcmpys_acc_s1_l,"Rxx32+=vrcmpys(Rss32,Rtt32):<<1:sat:raw:lo",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(0,RttV)))
    583 
    584 #undef vrcmpys_sema
    585 #define vrcmpys_sema(N,INWORD) \
    586 { fSETHALF(1,RdV,fGETHALF(1,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,INWORD))) + \
    587                        fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(1,INWORD))) + 0x8000)));\
    588   fSETHALF(0,RdV,fGETHALF(1,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,INWORD))) + \
    589                        fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(1,INWORD))) + 0x8000)));\
    590 }
    591 
    592 Q6INSN(M2_vrcmpys_s1rp_h,"Rd32=vrcmpys(Rss32,Rtt32):<<1:rnd:sat:raw:hi",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(1,RttV)))
    593 Q6INSN(M2_vrcmpys_s1rp_l,"Rd32=vrcmpys(Rss32,Rtt32):<<1:rnd:sat:raw:lo",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(0,RttV)))
    594 
    595 /**************************************************************/
    596 /* mixed mode 32x16 vector dual multiplies                    */
    597 /*                                                            */
    598 /**************************************************************/
    599 
    600 /* SIGNED 32 x SIGNED 16 */
    601 
    602 
    603 #undef mixmpy_sema
    604 #define mixmpy_sema(N)\
    605 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV))))>>16)) ); \
    606   fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV))))>>16)) ); \
    607 }
    608 Q6INSN(M2_mmacls_s0,"Rxx32+=vmpyweh(Rss32,Rtt32):sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
    609 Q6INSN(M2_mmacls_s1,"Rxx32+=vmpyweh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
    610 
    611 #undef mixmpy_sema
    612 #define mixmpy_sema(N)\
    613 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV))))>>16) )); \
    614   fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV))))>>16 ))); \
    615 }
    616 Q6INSN(M2_mmachs_s0,"Rxx32+=vmpywoh(Rss32,Rtt32):sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
    617 Q6INSN(M2_mmachs_s1,"Rxx32+=vmpywoh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
    618 
    619 #undef mixmpy_sema
    620 #define mixmpy_sema(N)\
    621 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV))))>>16)); \
    622   fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV))))>>16)); \
    623 }
    624 Q6INSN(M2_mmpyl_s0,"Rdd32=vmpyweh(Rss32,Rtt32):sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
    625 Q6INSN(M2_mmpyl_s1,"Rdd32=vmpyweh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
    626 
    627 #undef mixmpy_sema
    628 #define mixmpy_sema(N)\
    629 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV))))>>16)); \
    630   fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV))))>>16)); \
    631 }
    632 Q6INSN(M2_mmpyh_s0,"Rdd32=vmpywoh(Rss32,Rtt32):sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
    633 Q6INSN(M2_mmpyh_s1,"Rdd32=vmpywoh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
    634 
    635 
    636 /* With rounding */
    637 
    638 #undef mixmpy_sema
    639 #define mixmpy_sema(N)\
    640 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV)))+0x8000)>>16)) ); \
    641   fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV)))+0x8000)>>16)) ); \
    642 }
    643 Q6INSN(M2_mmacls_rs0,"Rxx32+=vmpyweh(Rss32,Rtt32):rnd:sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
    644 Q6INSN(M2_mmacls_rs1,"Rxx32+=vmpyweh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
    645 
    646 #undef mixmpy_sema
    647 #define mixmpy_sema(N)\
    648 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV)))+0x8000)>>16) )); \
    649   fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV)))+0x8000)>>16 ))); \
    650 }
    651 Q6INSN(M2_mmachs_rs0,"Rxx32+=vmpywoh(Rss32,Rtt32):rnd:sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
    652 Q6INSN(M2_mmachs_rs1,"Rxx32+=vmpywoh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
    653 
    654 #undef mixmpy_sema
    655 #define mixmpy_sema(N)\
    656 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV)))+0x8000)>>16)); \
    657   fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV)))+0x8000)>>16)); \
    658 }
    659 Q6INSN(M2_mmpyl_rs0,"Rdd32=vmpyweh(Rss32,Rtt32):rnd:sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
    660 Q6INSN(M2_mmpyl_rs1,"Rdd32=vmpyweh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
    661 
    662 #undef mixmpy_sema
    663 #define mixmpy_sema(N)\
    664 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV)))+0x8000)>>16)); \
    665   fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV)))+0x8000)>>16)); \
    666 }
    667 Q6INSN(M2_mmpyh_rs0,"Rdd32=vmpywoh(Rss32,Rtt32):rnd:sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
    668 Q6INSN(M2_mmpyh_rs1,"Rdd32=vmpywoh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
    669 
    670 
    671 #undef mixmpy_sema
    672 #define mixmpy_sema(DEST,EQUALS,N)\
    673 { DEST EQUALS fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV))) + fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV)));}
    674 
    675 Q6INSN(M4_vrmpyeh_s0,"Rdd32=vrmpyweh(Rss32,Rtt32)",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(RddV,=,0))
    676 Q6INSN(M4_vrmpyeh_s1,"Rdd32=vrmpyweh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RddV,=,1))
    677 Q6INSN(M4_vrmpyeh_acc_s0,"Rxx32+=vrmpyweh(Rss32,Rtt32)",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(RxxV,+=,0))
    678 Q6INSN(M4_vrmpyeh_acc_s1,"Rxx32+=vrmpyweh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RxxV,+=,1))
    679 
    680 #undef mixmpy_sema
    681 #define mixmpy_sema(DEST,EQUALS,N)\
    682 { DEST EQUALS fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV))) + fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV)));}
    683 
    684 Q6INSN(M4_vrmpyoh_s0,"Rdd32=vrmpywoh(Rss32,Rtt32)",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(RddV,=,0))
    685 Q6INSN(M4_vrmpyoh_s1,"Rdd32=vrmpywoh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RddV,=,1))
    686 Q6INSN(M4_vrmpyoh_acc_s0,"Rxx32+=vrmpywoh(Rss32,Rtt32)",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(RxxV,+=,0))
    687 Q6INSN(M4_vrmpyoh_acc_s1,"Rxx32+=vrmpywoh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RxxV,+=,1))
    688 
    689 
    690 
    691 
    692 
    693 
    694 #undef mixmpy_sema
    695 #define mixmpy_sema(N,H,RND)\
    696 {  RdV = fSAT((fSCALE(N,fMPY3216SS(RsV,fGETHALF(H,RtV)))RND)>>16); \
    697 }
    698 Q6INSN(M2_hmmpyl_rs1,"Rd32=mpy(Rs32,Rt.L32):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,0,+0x8000))
    699 Q6INSN(M2_hmmpyh_rs1,"Rd32=mpy(Rs32,Rt.H32):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,1,+0x8000))
    700 Q6INSN(M2_hmmpyl_s1,"Rd32=mpy(Rs32,Rt.L32):<<1:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,0,))
    701 Q6INSN(M2_hmmpyh_s1,"Rd32=mpy(Rs32,Rt.H32):<<1:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,1,))
    702 
    703 
    704 
    705 
    706 
    707 
    708 
    709 
    710 
    711 /* SIGNED 32 x UNSIGNED 16 */
    712 
    713 #undef mixmpy_sema
    714 #define mixmpy_sema(N)\
    715 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV))))>>16)) ); \
    716   fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV))))>>16)) ); \
    717 }
    718 Q6INSN(M2_mmaculs_s0,"Rxx32+=vmpyweuh(Rss32,Rtt32):sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
    719 Q6INSN(M2_mmaculs_s1,"Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
    720 
    721 #undef mixmpy_sema
    722 #define mixmpy_sema(N)\
    723 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV))))>>16) )); \
    724   fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV))))>>16 ))); \
    725 }
    726 Q6INSN(M2_mmacuhs_s0,"Rxx32+=vmpywouh(Rss32,Rtt32):sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
    727 Q6INSN(M2_mmacuhs_s1,"Rxx32+=vmpywouh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
    728 
    729 #undef mixmpy_sema
    730 #define mixmpy_sema(N)\
    731 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV))))>>16)); \
    732   fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV))))>>16)); \
    733 }
    734 Q6INSN(M2_mmpyul_s0,"Rdd32=vmpyweuh(Rss32,Rtt32):sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
    735 Q6INSN(M2_mmpyul_s1,"Rdd32=vmpyweuh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
    736 
    737 #undef mixmpy_sema
    738 #define mixmpy_sema(N)\
    739 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV))))>>16)); \
    740   fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV))))>>16)); \
    741 }
    742 Q6INSN(M2_mmpyuh_s0,"Rdd32=vmpywouh(Rss32,Rtt32):sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
    743 Q6INSN(M2_mmpyuh_s1,"Rdd32=vmpywouh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
    744 
    745 
    746 /* With rounding */
    747 
    748 #undef mixmpy_sema
    749 #define mixmpy_sema(N)\
    750 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV)))+0x8000)>>16)) ); \
    751   fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV)))+0x8000)>>16)) ); \
    752 }
    753 Q6INSN(M2_mmaculs_rs0,"Rxx32+=vmpyweuh(Rss32,Rtt32):rnd:sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
    754 Q6INSN(M2_mmaculs_rs1,"Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
    755 
    756 #undef mixmpy_sema
    757 #define mixmpy_sema(N)\
    758 { fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV)))+0x8000)>>16) )); \
    759   fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV)))+0x8000)>>16 ))); \
    760 }
    761 Q6INSN(M2_mmacuhs_rs0,"Rxx32+=vmpywouh(Rss32,Rtt32):rnd:sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
    762 Q6INSN(M2_mmacuhs_rs1,"Rxx32+=vmpywouh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
    763 
    764 #undef mixmpy_sema
    765 #define mixmpy_sema(N)\
    766 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV)))+0x8000)>>16)); \
    767   fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV)))+0x8000)>>16)); \
    768 }
    769 Q6INSN(M2_mmpyul_rs0,"Rdd32=vmpyweuh(Rss32,Rtt32):rnd:sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
    770 Q6INSN(M2_mmpyul_rs1,"Rdd32=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
    771 
    772 #undef mixmpy_sema
    773 #define mixmpy_sema(N)\
    774 { fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV)))+0x8000)>>16)); \
    775   fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV)))+0x8000)>>16)); \
    776 }
    777 Q6INSN(M2_mmpyuh_rs0,"Rdd32=vmpywouh(Rss32,Rtt32):rnd:sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
    778 Q6INSN(M2_mmpyuh_rs1,"Rdd32=vmpywouh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
    779 
    780 
    781 /**************************************************************/
    782 /* complex mac with full 64-bit accum - no sat, no shift      */
    783 /* either do real or accum, never both                        */
    784 /**************************************************************/
    785 
    786 Q6INSN(M2_vrcmaci_s0,"Rxx32+=vrcmpyi(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mac Imaginary",
    787 {
    788 RxxV = RxxV + fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) + \
    789               fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \
    790               fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) + \
    791               fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\
    792 })
    793 
    794 Q6INSN(M2_vrcmacr_s0,"Rxx32+=vrcmpyr(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mac Real",
    795 { RxxV = RxxV + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) - \
    796                 fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \
    797                 fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) - \
    798                 fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
    799 })
    800 
    801 Q6INSN(M2_vrcmaci_s0c,"Rxx32+=vrcmpyi(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mac Imaginary",
    802 {
    803 RxxV = RxxV + fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) - \
    804               fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \
    805               fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) - \
    806               fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\
    807 })
    808 
    809 Q6INSN(M2_vrcmacr_s0c,"Rxx32+=vrcmpyr(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mac Real",
    810 { RxxV = RxxV + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) + \
    811                 fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \
    812                 fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) + \
    813                 fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
    814 })
    815 
    816 Q6INSN(M2_cmaci_s0,"Rxx32+=cmpyi(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mac Imaginary",
    817 {
    818 RxxV = RxxV + fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV)) + \
    819               fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV));
    820 })
    821 
    822 Q6INSN(M2_cmacr_s0,"Rxx32+=cmpyr(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mac Real",
    823 { RxxV = RxxV + fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)) - \
    824                 fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV));
    825 })
    826 
    827 
    828 Q6INSN(M2_vrcmpyi_s0,"Rdd32=vrcmpyi(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mpy Imaginary",
    829 {
    830 RddV = fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) + \
    831        fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \
    832        fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) + \
    833        fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\
    834 })
    835 
    836 Q6INSN(M2_vrcmpyr_s0,"Rdd32=vrcmpyr(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mpy Real",
    837 { RddV = fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) - \
    838          fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \
    839          fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) - \
    840          fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
    841 })
    842 
    843 Q6INSN(M2_vrcmpyi_s0c,"Rdd32=vrcmpyi(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mpy Imaginary",
    844 {
    845 RddV = fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) - \
    846        fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \
    847        fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) - \
    848        fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\
    849 })
    850 
    851 Q6INSN(M2_vrcmpyr_s0c,"Rdd32=vrcmpyr(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mpy Real",
    852 { RddV = fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) + \
    853          fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \
    854          fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) + \
    855          fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
    856 })
    857 
    858 Q6INSN(M2_cmpyi_s0,"Rdd32=cmpyi(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mpy Imaginary",
    859 {
    860 RddV = fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV)) + \
    861        fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV));
    862 })
    863 
    864 Q6INSN(M2_cmpyr_s0,"Rdd32=cmpyr(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mpy Real",
    865 { RddV = fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)) - \
    866          fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV));
    867 })
    868 
    869 
    870 /**************************************************************/
    871 /* Complex mpy/mac with 2x32 bit accum, sat, shift            */
    872 /* 32x16 real or imag                                         */
    873 /**************************************************************/
    874 
    875 #if 1
    876 
    877 Q6INSN(M4_cmpyi_wh,"Rd32=cmpyiwh(Rss32,Rt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply",
    878 {
    879  RdV = fSAT(  (  fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RtV))
    880                + fMPY3216SS(fGETWORD(1,RssV),fGETHALF(0,RtV))
    881                + 0x4000)>>15);
    882 })
    883 
    884 
    885 Q6INSN(M4_cmpyr_wh,"Rd32=cmpyrwh(Rss32,Rt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply",
    886 {
    887  RdV = fSAT(  (  fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RtV))
    888                - fMPY3216SS(fGETWORD(1,RssV),fGETHALF(1,RtV))
    889                + 0x4000)>>15);
    890 })
    891 
    892 Q6INSN(M4_cmpyi_whc,"Rd32=cmpyiwh(Rss32,Rt32*):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply",
    893 {
    894  RdV = fSAT(  (  fMPY3216SS(fGETWORD(1,RssV),fGETHALF(0,RtV))
    895                - fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RtV))
    896                + 0x4000)>>15);
    897 })
    898 
    899 
    900 Q6INSN(M4_cmpyr_whc,"Rd32=cmpyrwh(Rss32,Rt32*):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply",
    901 {
    902  RdV = fSAT(  (  fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RtV))
    903                + fMPY3216SS(fGETWORD(1,RssV),fGETHALF(1,RtV))
    904                + 0x4000)>>15);
    905 })
    906 
    907 
    908 #endif
    909 
    910 /**************************************************************/
    911 /* Vector mpy/mac with 2x32 bit accum, sat, shift             */
    912 /* either do real or imag,  never both                        */
    913 /**************************************************************/
    914 
    915 #undef VCMPYSEMI
    916 #define VCMPYSEMI(DST,ACC0,ACC1,SHIFT,SAT) \
    917     fSETWORD(0,DST,SAT(ACC0 fSCALE(SHIFT,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) + \
    918         fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV))))); \
    919     fSETWORD(1,DST,SAT(ACC1 fSCALE(SHIFT,fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) + \
    920         fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV))))); \
    921 
    922 #undef VCMPYSEMR
    923 #define VCMPYSEMR(DST,ACC0,ACC1,SHIFT,SAT) \
    924     fSETWORD(0,DST,SAT(ACC0 fSCALE(SHIFT,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) - \
    925         fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))))); \
    926     fSETWORD(1,DST,SAT(ACC1 fSCALE(SHIFT,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) - \
    927         fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV))))); \
    928 
    929 
    930 #undef VCMPYIR
    931 #define VCMPYIR(TAGBASE,DSTSYN,DSTVAL,ACCSEM,ACCVAL0,ACCVAL1,SHIFTSYN,SHIFTVAL,SATSYN,SATVAL) \
    932 Q6INSN(M2_##TAGBASE##i,DSTSYN ACCSEM "vcmpyi(Rss32,Rtt32)" SHIFTSYN SATSYN,ATTRIBS(A_ARCHV2), \
    933     "Vector Complex Multiply Imaginary", { VCMPYSEMI(DSTVAL,ACCVAL0,ACCVAL1,SHIFTVAL,SATVAL); }) \
    934 Q6INSN(M2_##TAGBASE##r,DSTSYN ACCSEM "vcmpyr(Rss32,Rtt32)" SHIFTSYN SATSYN,ATTRIBS(A_ARCHV2), \
    935     "Vector Complex Multiply Imaginary", { VCMPYSEMR(DSTVAL,ACCVAL0,ACCVAL1,SHIFTVAL,SATVAL); })
    936 
    937 
    938 VCMPYIR(vcmpy_s0_sat_,"Rdd32",RddV,"=",,,"",0,":sat",fSAT)
    939 VCMPYIR(vcmpy_s1_sat_,"Rdd32",RddV,"=",,,":<<1",1,":sat",fSAT)
    940 VCMPYIR(vcmac_s0_sat_,"Rxx32",RxxV,"+=",fGETWORD(0,RxxV) + ,fGETWORD(1,RxxV) + ,"",0,":sat",fSAT)
    941 
    942 
    943 /**********************************************************************
    944  *  Rotation  -- by 0, 90, 180, or 270 means mult by 1, J, -1, -J     *
    945  *********************************************************************/
    946 
    947 Q6INSN(S2_vcrotate,"Rdd32=vcrotate(Rss32,Rt32)",ATTRIBS(A_ARCHV2),"Rotate complex value by multiple of PI/2",
    948 {
    949     fHIDE(size1u_t tmp;)
    950     tmp = fEXTRACTU_RANGE(RtV,1,0);
    951     if (tmp == 0) { /* No rotation */
    952         fSETHALF(0,RddV,fGETHALF(0,RssV));
    953         fSETHALF(1,RddV,fGETHALF(1,RssV));
    954     } else if (tmp == 1) { /* Multiply by -J */
    955         fSETHALF(0,RddV,fGETHALF(1,RssV));
    956         fSETHALF(1,RddV,fSATH(-fGETHALF(0,RssV)));
    957     } else if (tmp == 2) { /* Multiply by J */
    958         fSETHALF(0,RddV,fSATH(-fGETHALF(1,RssV)));
    959         fSETHALF(1,RddV,fGETHALF(0,RssV));
    960     } else { /* Multiply by -1 */
    961         fHIDE(if (tmp != 3) fatal("C is broken");)
    962         fSETHALF(0,RddV,fSATH(-fGETHALF(0,RssV)));
    963         fSETHALF(1,RddV,fSATH(-fGETHALF(1,RssV)));
    964     }
    965     tmp = fEXTRACTU_RANGE(RtV,3,2);
    966     if (tmp == 0) { /* No rotation */
    967         fSETHALF(2,RddV,fGETHALF(2,RssV));
    968         fSETHALF(3,RddV,fGETHALF(3,RssV));
    969     } else if (tmp == 1) { /* Multiply by -J */
    970         fSETHALF(2,RddV,fGETHALF(3,RssV));
    971         fSETHALF(3,RddV,fSATH(-fGETHALF(2,RssV)));
    972     } else if (tmp == 2) { /* Multiply by J */
    973         fSETHALF(2,RddV,fSATH(-fGETHALF(3,RssV)));
    974         fSETHALF(3,RddV,fGETHALF(2,RssV));
    975     } else { /* Multiply by -1 */
    976         fHIDE(if (tmp != 3) fatal("C is broken");)
    977         fSETHALF(2,RddV,fSATH(-fGETHALF(2,RssV)));
    978         fSETHALF(3,RddV,fSATH(-fGETHALF(3,RssV)));
    979     }
    980 })
    981 
    982 
    983 Q6INSN(S4_vrcrotate_acc,"Rxx32+=vrcrotate(Rss32,Rt32,#u2)",ATTRIBS(),"Rotate and Reduce Bytes",
    984 {
    985     fHIDE(int i; int tmpr; int tmpi; unsigned int control;)
    986     fHIDE(int sumr; int sumi;)
    987     sumr = 0;
    988     sumi = 0;
    989     control = fGETUBYTE(uiV,RtV);
    990     for (i = 0; i < 8; i += 2) {
    991         tmpr = fGETBYTE(i  ,RssV);
    992         tmpi = fGETBYTE(i+1,RssV);
    993         switch (control & 3) {
    994         case 0: /* No Rotation */
    995             sumr += tmpr;
    996             sumi += tmpi;
    997             break;
    998         case 1: /* Multiply by -J */
    999             sumr += tmpi;
   1000             sumi -= tmpr;
   1001             break;
   1002         case 2: /* Multiply by J */
   1003             sumr -= tmpi;
   1004             sumi += tmpr;
   1005             break;
   1006         case 3: /* Multiply by -1 */
   1007             sumr -= tmpr;
   1008             sumi -= tmpi;
   1009             break;
   1010         fHIDE(default: fatal("C is broken!");)
   1011         }
   1012         control = control >> 2;
   1013     }
   1014     fSETWORD(0,RxxV,fGETWORD(0,RxxV) + sumr);
   1015     fSETWORD(1,RxxV,fGETWORD(1,RxxV) + sumi);
   1016 })
   1017 
   1018 Q6INSN(S4_vrcrotate,"Rdd32=vrcrotate(Rss32,Rt32,#u2)",ATTRIBS(),"Rotate and Reduce Bytes",
   1019 {
   1020     fHIDE(int i; int tmpr; int tmpi; unsigned int control;)
   1021     fHIDE(int sumr; int sumi;)
   1022     sumr = 0;
   1023     sumi = 0;
   1024     control = fGETUBYTE(uiV,RtV);
   1025     for (i = 0; i < 8; i += 2) {
   1026         tmpr = fGETBYTE(i  ,RssV);
   1027         tmpi = fGETBYTE(i+1,RssV);
   1028         switch (control & 3) {
   1029         case 0: /* No Rotation */
   1030             sumr += tmpr;
   1031             sumi += tmpi;
   1032             break;
   1033         case 1: /* Multiply by -J */
   1034             sumr += tmpi;
   1035             sumi -= tmpr;
   1036             break;
   1037         case 2: /* Multiply by J */
   1038             sumr -= tmpi;
   1039             sumi += tmpr;
   1040             break;
   1041         case 3: /* Multiply by -1 */
   1042             sumr -= tmpr;
   1043             sumi -= tmpi;
   1044             break;
   1045         fHIDE(default: fatal("C is broken!");)
   1046         }
   1047         control = control >> 2;
   1048     }
   1049     fSETWORD(0,RddV,sumr);
   1050     fSETWORD(1,RddV,sumi);
   1051 })
   1052 
   1053 
   1054 Q6INSN(S2_vcnegh,"Rdd32=vcnegh(Rss32,Rt32)",ATTRIBS(),"Conditional Negate halfwords",
   1055 {
   1056     fHIDE(int i;)
   1057     for (i = 0; i < 4; i++) {
   1058         if (fGETBIT(i,RtV)) {
   1059             fSETHALF(i,RddV,fSATH(-fGETHALF(i,RssV)));
   1060         } else {
   1061             fSETHALF(i,RddV,fGETHALF(i,RssV));
   1062         }
   1063     }
   1064 })
   1065 
   1066 Q6INSN(S2_vrcnegh,"Rxx32+=vrcnegh(Rss32,Rt32)",ATTRIBS(),"Vector Reduce Conditional Negate halfwords",
   1067 {
   1068     fHIDE(int i;)
   1069     for (i = 0; i < 4; i++) {
   1070         if (fGETBIT(i,RtV)) {
   1071             RxxV += -fGETHALF(i,RssV);
   1072         } else {
   1073             RxxV += fGETHALF(i,RssV);
   1074         }
   1075     }
   1076 })
   1077 
   1078 
   1079 /**********************************************************************
   1080  *  Finite-field multiplies.  Written by David Hoyle                  *
   1081  *********************************************************************/
   1082 
   1083 Q6INSN(M4_pmpyw,"Rdd32=pmpyw(Rs32,Rt32)",ATTRIBS(),"Polynomial 32bit Multiplication with Addition in GF(2)",
   1084 {
   1085         fHIDE(int i; unsigned int y;)
   1086         fHIDE(unsigned long long x; unsigned long long prod;)
   1087         x = fGETUWORD(0, RsV);
   1088         y = fGETUWORD(0, RtV);
   1089 
   1090         prod = 0;
   1091         for(i=0; i < 32; i++) {
   1092             if((y >> i) & 1) prod ^= (x << i);
   1093         }
   1094         RddV = prod;
   1095 })
   1096 
   1097 Q6INSN(M4_vpmpyh,"Rdd32=vpmpyh(Rs32,Rt32)",ATTRIBS(),"Dual Polynomial 16bit Multiplication with Addition in GF(2)",
   1098 {
   1099         fHIDE(int i; unsigned int x0; unsigned int x1;)
   1100         fHIDE(unsigned int y0; unsigned int y1;)
   1101         fHIDE(unsigned int prod0; unsigned int prod1;)
   1102 
   1103         x0 = fGETUHALF(0, RsV);
   1104         x1 = fGETUHALF(1, RsV);
   1105         y0 = fGETUHALF(0, RtV);
   1106         y1 = fGETUHALF(1, RtV);
   1107 
   1108         prod0 = prod1 = 0;
   1109         for(i=0; i < 16; i++) {
   1110             if((y0 >> i) & 1) prod0 ^= (x0 << i);
   1111             if((y1 >> i) & 1) prod1 ^= (x1 << i);
   1112         }
   1113         fSETHALF(0,RddV,fGETUHALF(0,prod0));
   1114         fSETHALF(1,RddV,fGETUHALF(0,prod1));
   1115         fSETHALF(2,RddV,fGETUHALF(1,prod0));
   1116         fSETHALF(3,RddV,fGETUHALF(1,prod1));
   1117 })
   1118 
   1119 Q6INSN(M4_pmpyw_acc,"Rxx32^=pmpyw(Rs32,Rt32)",ATTRIBS(),"Polynomial 32bit Multiplication with Addition in GF(2)",
   1120 {
   1121         fHIDE(int i; unsigned int y;)
   1122         fHIDE(unsigned long long x; unsigned long long prod;)
   1123         x = fGETUWORD(0, RsV);
   1124         y = fGETUWORD(0, RtV);
   1125 
   1126         prod = 0;
   1127         for(i=0; i < 32; i++) {
   1128             if((y >> i) & 1) prod ^= (x << i);
   1129         }
   1130         RxxV ^= prod;
   1131 })
   1132 
   1133 Q6INSN(M4_vpmpyh_acc,"Rxx32^=vpmpyh(Rs32,Rt32)",ATTRIBS(),"Dual Polynomial 16bit Multiplication with Addition in GF(2)",
   1134 {
   1135         fHIDE(int i; unsigned int x0; unsigned int x1;)
   1136         fHIDE(unsigned int y0; unsigned int y1;)
   1137         fHIDE(unsigned int prod0; unsigned int prod1;)
   1138 
   1139         x0 = fGETUHALF(0, RsV);
   1140         x1 = fGETUHALF(1, RsV);
   1141         y0 = fGETUHALF(0, RtV);
   1142         y1 = fGETUHALF(1, RtV);
   1143 
   1144         prod0 = prod1 = 0;
   1145         for(i=0; i < 16; i++) {
   1146             if((y0 >> i) & 1) prod0 ^= (x0 << i);
   1147             if((y1 >> i) & 1) prod1 ^= (x1 << i);
   1148         }
   1149         fSETHALF(0,RxxV,fGETUHALF(0,RxxV) ^ fGETUHALF(0,prod0));
   1150         fSETHALF(1,RxxV,fGETUHALF(1,RxxV) ^ fGETUHALF(0,prod1));
   1151         fSETHALF(2,RxxV,fGETUHALF(2,RxxV) ^ fGETUHALF(1,prod0));
   1152         fSETHALF(3,RxxV,fGETUHALF(3,RxxV) ^ fGETUHALF(1,prod1));
   1153 })
   1154 
   1155 
   1156 /* V70: TINY CORE */
   1157 
   1158 #define CMPY64(TAG,NAME,DESC,OPERAND1,OP,W0,W1,W2,W3) \
   1159 Q6INSN(M7_##TAG,"Rdd32=" NAME "(Rss32," OPERAND1 ")",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply 64-bit " DESC,    { RddV  = (fMPY32SS(fGETWORD(W0, RssV), fGETWORD(W1, RttV)) OP fMPY32SS(fGETWORD(W2, RssV), fGETWORD(W3, RttV)));})\
   1160 Q6INSN(M7_##TAG##_acc,"Rxx32+=" NAME "(Rss32,"OPERAND1")",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply-Accumulate 64-bit " DESC, { RxxV += (fMPY32SS(fGETWORD(W0, RssV), fGETWORD(W1, RttV)) OP fMPY32SS(fGETWORD(W2, RssV), fGETWORD(W3, RttV)));})
   1161 
   1162 CMPY64(dcmpyrw, "cmpyrw","Real","Rtt32" ,-,0,0,1,1)
   1163 CMPY64(dcmpyrwc,"cmpyrw","Real","Rtt32*",+,0,0,1,1)
   1164 CMPY64(dcmpyiw, "cmpyiw","Imag","Rtt32" ,+,0,1,1,0)
   1165 CMPY64(dcmpyiwc,"cmpyiw","Imag","Rtt32*",-,1,0,0,1)
   1166 
   1167 #define CMPY128(TAG, NAME, OPERAND1, WORD0, WORD1, WORD2, WORD3, OP) \
   1168 Q6INSN(M7_##TAG,"Rd32=" NAME "(Rss32,"OPERAND1"):<<1:sat",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply 32-bit result real",  \
   1169 { \
   1170 fHIDE(size16s_t acc128;)\
   1171 fHIDE(size16s_t tmp128;)\
   1172 fHIDE(size8s_t acc64;)\
   1173 tmp128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD0, RssV), fGETWORD(WORD1, RttV)));\
   1174 acc128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD2, RssV), fGETWORD(WORD3, RttV)));\
   1175 acc128 = OP(tmp128,acc128);\
   1176 acc128 = fSHIFTR128(acc128, 31);\
   1177 acc64 =  fCAST16S_8S(acc128);\
   1178 RdV = fSATW(acc64);\
   1179 })
   1180 
   1181 
   1182 CMPY128(wcmpyrw, "cmpyrw", "Rtt32", 0, 0, 1, 1, fSUB128)
   1183 CMPY128(wcmpyrwc, "cmpyrw", "Rtt32*", 0, 0, 1, 1, fADD128)
   1184 CMPY128(wcmpyiw, "cmpyiw", "Rtt32", 0, 1, 1, 0, fADD128)
   1185 CMPY128(wcmpyiwc, "cmpyiw", "Rtt32*", 1, 0, 0, 1, fSUB128)
   1186 
   1187 
   1188 #define CMPY128RND(TAG, NAME, OPERAND1, WORD0, WORD1, WORD2, WORD3, OP) \
   1189 Q6INSN(M7_##TAG##_rnd,"Rd32=" NAME "(Rss32,"OPERAND1"):<<1:rnd:sat",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply 32-bit result real",  \
   1190 { \
   1191 fHIDE(size16s_t acc128;)\
   1192 fHIDE(size16s_t tmp128;)\
   1193 fHIDE(size16s_t const128;)\
   1194 fHIDE(size8s_t acc64;)\
   1195 tmp128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD0, RssV), fGETWORD(WORD1, RttV)));\
   1196 acc128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD2, RssV), fGETWORD(WORD3, RttV)));\
   1197 const128 = fCAST8S_16S(fCONSTLL(0x40000000));\
   1198 acc128 = OP(tmp128,acc128);\
   1199 acc128 = fADD128(acc128,const128);\
   1200 acc128 = fSHIFTR128(acc128, 31);\
   1201 acc64 =  fCAST16S_8S(acc128);\
   1202 RdV = fSATW(acc64);\
   1203 })
   1204 
   1205 CMPY128RND(wcmpyrw, "cmpyrw", "Rtt32", 0, 0, 1, 1, fSUB128)
   1206 CMPY128RND(wcmpyrwc, "cmpyrw", "Rtt32*", 0, 0, 1, 1, fADD128)
   1207 CMPY128RND(wcmpyiw, "cmpyiw", "Rtt32", 0, 1, 1, 0, fADD128)
   1208 CMPY128RND(wcmpyiwc, "cmpyiw", "Rtt32*", 1, 0, 0, 1, fSUB128)