| 1 | /* hp2100_cpu7.c: HP 1000 VIS and SIGNAL/1000 microcode\r |
| 2 | \r |
| 3 | Copyright (c) 2006, J. David Bryan\r |
| 4 | Copyright (c) 2008, Holger Veit\r |
| 5 | \r |
| 6 | Permission is hereby granted, free of charge, to any person obtaining a\r |
| 7 | copy of this software and associated documentation files (the "Software"),\r |
| 8 | to deal in the Software without restriction, including without limitation\r |
| 9 | the rights to use, copy, modify, merge, publish, distribute, sublicense,\r |
| 10 | and/or sell copies of the Software, and to permit persons to whom the\r |
| 11 | Software is furnished to do so, subject to the following conditions:\r |
| 12 | \r |
| 13 | The above copyright notice and this permission notice shall be included in\r |
| 14 | all copies or substantial portions of the Software.\r |
| 15 | \r |
| 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\r |
| 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\r |
| 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\r |
| 19 | THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER\r |
| 20 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r |
| 21 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\r |
| 22 | \r |
| 23 | Except as contained in this notice, the name of the authors shall not be\r |
| 24 | used in advertising or otherwise to promote the sale, use or other dealings\r |
| 25 | in this Software without prior written authorization from the authors.\r |
| 26 | \r |
| 27 | CPU7 Vector Instruction Set and SIGNAL firmware\r |
| 28 | \r |
| 29 | 30-Apr-08 JDB Updated SIGNAL code from Holger\r |
| 30 | 24-Apr-08 HV Implemented SIGNAL\r |
| 31 | 20-Apr-08 JDB Updated comments\r |
| 32 | 26-Feb-08 HV Implemented VIS\r |
| 33 | \r |
| 34 | Primary references:\r |
| 35 | - HP 1000 M/E/F-Series Computers Technical Reference Handbook\r |
| 36 | (5955-0282, Mar-1980)\r |
| 37 | - HP 1000 M/E/F-Series Computers Engineering and Reference Documentation\r |
| 38 | (92851-90001, Mar-1981)\r |
| 39 | - Macro/1000 Reference Manual (92059-90001, Dec-1992)\r |
| 40 | \r |
| 41 | Additional references are listed with the associated firmware\r |
| 42 | implementations, as are the HP option model numbers pertaining to the\r |
| 43 | applicable CPUs.\r |
| 44 | */\r |
| 45 | \r |
| 46 | #include "hp2100_defs.h"\r |
| 47 | #include "hp2100_cpu.h"\r |
| 48 | #include "hp2100_cpu1.h"\r |
| 49 | \r |
| 50 | #if defined (HAVE_INT64) /* int64 support available */\r |
| 51 | \r |
| 52 | #include "hp2100_fp1.h"\r |
| 53 | \r |
| 54 | \r |
| 55 | t_stat cpu_vis (uint32 IR, uint32 intrq); /* Vector Instruction Set */\r |
| 56 | t_stat cpu_signal (uint32 IR, uint32 intrq); /* SIGNAL/1000 Instructions */\r |
| 57 | \r |
| 58 | static const OP zero = { { 0, 0, 0, 0, 0 } }; /* DEC 0.0D0 */\r |
| 59 | \r |
| 60 | \r |
| 61 | /* Vector Instruction Set\r |
| 62 | \r |
| 63 | The VIS provides instructions that operate on one-dimensional arrays of\r |
| 64 | floating-point values. Both single- and double-precision operations are\r |
| 65 | supported. VIS uses the F-Series floating-point processor to handle the\r |
| 66 | floating-point math.\r |
| 67 | \r |
| 68 | Option implementation by CPU was as follows:\r |
| 69 | \r |
| 70 | 2114 2115 2116 2100 1000-M 1000-E 1000-F\r |
| 71 | ------ ------ ------ ------ ------ ------ ------\r |
| 72 | N/A N/A N/A N/A N/A N/A 12824A\r |
| 73 | \r |
| 74 | The routines are mapped to instruction codes as follows:\r |
| 75 | \r |
| 76 | Single-Precision Double-Precision\r |
| 77 | Instr. Opcode Subcod Instr. Opcode Subcod Description\r |
| 78 | ------ ------ ------ ------ ------ ------ -----------------------------\r |
| 79 | VADD 101460 000000 DVADD 105460 004002 Vector add\r |
| 80 | VSUB 101460 000020 DVSUB 105460 004022 Vector subtract\r |
| 81 | VMPY 101460 000040 DVMPY 105460 004042 Vector multiply\r |
| 82 | VDIV 101460 000060 DVDIV 105460 004062 Vector divide\r |
| 83 | VSAD 101460 000400 DVSAD 105460 004402 Scalar-vector add\r |
| 84 | VSSB 101460 000420 DVSSB 105460 004422 Scalar-vector subtract\r |
| 85 | VSMY 101460 000440 DVSMY 105460 004442 Scalar-vector multiply\r |
| 86 | VSDV 101460 000460 DVSDV 105460 004462 Scalar-vector divide\r |
| 87 | VPIV 101461 0xxxxx DVPIV 105461 0xxxxx Vector pivot\r |
| 88 | VABS 101462 0xxxxx DVABS 105462 0xxxxx Vector absolute value\r |
| 89 | VSUM 101463 0xxxxx DVSUM 105463 0xxxxx Vector sum\r |
| 90 | VNRM 101464 0xxxxx DVNRM 105464 0xxxxx Vector norm\r |
| 91 | VDOT 101465 0xxxxx DVDOT 105465 0xxxxx Vector dot product\r |
| 92 | VMAX 101466 0xxxxx DVMAX 105466 0xxxxx Vector maximum value\r |
| 93 | VMAB 101467 0xxxxx DVMAB 105467 0xxxxx Vector maximum absolute value\r |
| 94 | VMIN 101470 0xxxxx DVMIN 105470 0xxxxx Vector minimum value\r |
| 95 | VMIB 101471 0xxxxx DVMIB 105471 0xxxxx Vector minimum absolute value\r |
| 96 | VMOV 101472 0xxxxx DVMOV 105472 0xxxxx Vector move\r |
| 97 | VSWP 101473 0xxxxx DVSWP 105473 0xxxxx Vector swap\r |
| 98 | .ERES 101474 -- -- -- -- Resolve array element address\r |
| 99 | .ESEG 101475 -- -- -- -- Load MSEG maps\r |
| 100 | .VSET 101476 -- -- -- -- Vector setup\r |
| 101 | [test] -- -- -- 105477 -- [self test]\r |
| 102 | \r |
| 103 | Instructions use IR bit 11 to select single- or double-precision format. The\r |
| 104 | double-precision instruction names begin with "D" (e.g., DVADD vs. VADD).\r |
| 105 | Most VIS instructions are two words in length, with a sub-opcode immediately\r |
| 106 | following the primary opcode.\r |
| 107 | \r |
| 108 | Notes:\r |
| 109 | \r |
| 110 | 1. The .VECT (101460) and .DVCT (105460) opcodes preface a single- or\r |
| 111 | double-precision arithmetic operation that is determined by the\r |
| 112 | sub-opcode value. The remainder of the dual-precision sub-opcode values\r |
| 113 | are "don't care," except for requiring a zero in bit 15.\r |
| 114 | \r |
| 115 | 2. The VIS uses the hardware FPP of the F-Series. FPP malfunctions are\r |
| 116 | detected by the VIS firmware and are indicated by a memory-protect\r |
| 117 | violation and setting the overflow flag. Under simulation,\r |
| 118 | malfunctions cannot occur.\r |
| 119 | \r |
| 120 | Additional references:\r |
| 121 | - 12824A Vector Instruction Set User's Manual (12824-90001, Jun-1979).\r |
| 122 | - VIS Microcode Source (12824-18059, revision 3).\r |
| 123 | */\r |
| 124 | \r |
| 125 | /* implemented in hp2100_cpu5.c (RTE-IV EMA functions) */\r |
| 126 | extern t_stat cpu_ema_eres(uint32* rtn,uint32 dtbl,uint32 atbl, t_bool debug);\r |
| 127 | extern t_stat cpu_ema_eseg(uint32* rtn,uint32 ir,uint32 tbl, t_bool debug);\r |
| 128 | extern t_stat cpu_ema_vset(uint32* rtn,OPS op, t_bool debug);\r |
| 129 | \r |
| 130 | static const OP_PAT op_vis[16] = {\r |
| 131 | OP_N, OP_AAKAKAKK,OP_AKAKK, OP_AAKK, /* .VECT VPIV VABS VSUM */\r |
| 132 | OP_AAKK, OP_AAKAKK, OP_AAKK, OP_AAKK, /* VNRM VDOT VMAX VMAB */\r |
| 133 | OP_AAKK, OP_AAKK, OP_AKAKK, OP_AKAKK, /* VMIN VMIB VMOV VSWP */\r |
| 134 | OP_AA, OP_A, OP_AAACCC,OP_N /* .ERES .ESEG .VSET [test] */\r |
| 135 | };\r |
| 136 | \r |
| 137 | static const t_bool op_ftnret[16] = {\r |
| 138 | FALSE, TRUE, TRUE, TRUE,\r |
| 139 | TRUE, TRUE, TRUE, TRUE,\r |
| 140 | TRUE, TRUE, TRUE, TRUE,\r |
| 141 | FALSE, TRUE, TRUE, FALSE,\r |
| 142 | };\r |
| 143 | \r |
| 144 | \r |
| 145 | /* handle the scalar/vector base ops */\r |
| 146 | static void vis_svop(uint32 subcode, OPS op, OPSIZE opsize)\r |
| 147 | {\r |
| 148 | OP v1,v2;\r |
| 149 | int16 delta = opsize==fp_f ? 2 : 4;\r |
| 150 | OP s = ReadOp(op[0].word,opsize);\r |
| 151 | uint32 v1addr = op[1].word;\r |
| 152 | int16 ix1 = INT16(op[2].word) * delta;\r |
| 153 | uint32 v2addr = op[3].word;\r |
| 154 | int16 ix2 = INT16(op[4].word) * delta;\r |
| 155 | int16 i, n = INT16(op[5].word);\r |
| 156 | uint32 fpuop = (subcode & 060) | (opsize==fp_f ? 0 : 2);\r |
| 157 | \r |
| 158 | if (n <= 0) return;\r |
| 159 | for (i=0; i<n; i++) {\r |
| 160 | v1 = ReadOp(v1addr, opsize);\r |
| 161 | (void)fp_exec(fpuop, &v2, s ,v1);\r |
| 162 | WriteOp(v2addr, v2, opsize);\r |
| 163 | v1addr += ix1;\r |
| 164 | v2addr += ix2;\r |
| 165 | }\r |
| 166 | }\r |
| 167 | \r |
| 168 | /* handle the vector/vector base ops */\r |
| 169 | static void vis_vvop(uint32 subcode, OPS op,OPSIZE opsize)\r |
| 170 | {\r |
| 171 | OP v1,v2,v3;\r |
| 172 | int16 delta = opsize==fp_f ? 2 : 4;\r |
| 173 | uint32 v1addr = op[0].word;\r |
| 174 | int32 ix1 = INT16(op[1].word) * delta;\r |
| 175 | uint32 v2addr = op[2].word;\r |
| 176 | int32 ix2 = INT16(op[3].word) * delta;\r |
| 177 | uint32 v3addr = op[4].word;\r |
| 178 | int32 ix3 = INT16(op[5].word) * delta;\r |
| 179 | int16 i, n = INT16(op[6].word);\r |
| 180 | uint32 fpuop = (subcode & 060) | (opsize==fp_f ? 0 : 2);\r |
| 181 | \r |
| 182 | if (n <= 0) return;\r |
| 183 | for (i=0; i<n; i++) {\r |
| 184 | v1 = ReadOp(v1addr, opsize);\r |
| 185 | v2 = ReadOp(v2addr, opsize);\r |
| 186 | (void)fp_exec(fpuop, &v3, v1 ,v2);\r |
| 187 | WriteOp(v3addr, v3, opsize);\r |
| 188 | v1addr += ix1;\r |
| 189 | v2addr += ix2;\r |
| 190 | v3addr += ix3;\r |
| 191 | }\r |
| 192 | }\r |
| 193 | \r |
| 194 | #define GET_MSIGN(op) ((op)->fpk[0] & 0100000)\r |
| 195 | \r |
| 196 | static void vis_abs(OP* in, uint32 opsize)\r |
| 197 | {\r |
| 198 | uint32 sign = GET_MSIGN(in); /* get sign */\r |
| 199 | if (sign) (void)fp_pcom(in, opsize); /* if negative, make positive */\r |
| 200 | }\r |
| 201 | \r |
| 202 | static void vis_minmax(OPS op,OPSIZE opsize,t_bool domax,t_bool doabs)\r |
| 203 | {\r |
| 204 | OP v1,vmxmn,res;\r |
| 205 | int16 delta = opsize==fp_f ? 2 : 4;\r |
| 206 | uint32 mxmnaddr = op[0].word;\r |
| 207 | uint32 v1addr = op[1].word;\r |
| 208 | int16 ix1 = INT16(op[2].word) * delta;\r |
| 209 | int16 n = INT16(op[3].word);\r |
| 210 | int16 i,mxmn,sign;\r |
| 211 | int32 subop = 020 | (opsize==fp_f ? 0 : 2);\r |
| 212 | \r |
| 213 | if (n <= 0) return;\r |
| 214 | mxmn = 0; /* index of maxmin element */\r |
| 215 | vmxmn = ReadOp(v1addr,opsize); /* initialize with first element */\r |
| 216 | if (doabs) vis_abs(&vmxmn,opsize); /* ABS(v[1]) if requested */\r |
| 217 | \r |
| 218 | for (i = 0; i<n; i++) {\r |
| 219 | v1 = ReadOp(v1addr,opsize); /* get v[i] */\r |
| 220 | if (doabs) vis_abs(&v1,opsize); /* build ABS(v[i]) if requested */\r |
| 221 | (void)fp_exec(subop,&res,vmxmn,v1); /* subtract vmxmn - v1[i] */\r |
| 222 | sign = GET_MSIGN(&res); /* !=0 if vmxmn < v1[i] */\r |
| 223 | if ((domax && sign) || /* new max value found */\r |
| 224 | (!domax && !sign)) { /* new min value found */\r |
| 225 | mxmn = i;\r |
| 226 | vmxmn = v1; /* save the new max/min value */\r |
| 227 | }\r |
| 228 | v1addr += ix1; /* point to next element */\r |
| 229 | }\r |
| 230 | res.word = mxmn+1; /* adjust to one-based FTN array */\r |
| 231 | WriteOp(mxmnaddr, res, in_s); /* save result */\r |
| 232 | }\r |
| 233 | \r |
| 234 | static void vis_vpiv(OPS op, OPSIZE opsize)\r |
| 235 | {\r |
| 236 | OP s,v1,v2,v3;\r |
| 237 | int16 delta = opsize==fp_f ? 2 : 4;\r |
| 238 | uint32 saddr = op[0].word;\r |
| 239 | uint32 v1addr = op[1].word;\r |
| 240 | int16 ix1 = INT16(op[2].word) * delta;\r |
| 241 | uint32 v2addr = op[3].word;\r |
| 242 | int16 ix2 = INT16(op[4].word) * delta;\r |
| 243 | uint32 v3addr = op[5].word;\r |
| 244 | int16 ix3 = INT16(op[6].word) * delta;\r |
| 245 | int16 i, n = INT16(op[7].word);\r |
| 246 | int16 oplen = opsize==fp_f ? 0 : 2;\r |
| 247 | \r |
| 248 | if (n <= 0) return;\r |
| 249 | s = ReadOp(saddr,opsize);\r |
| 250 | /* calculates v3[k] = s * v1[i] + v2[j] for incrementing i,j,k */\r |
| 251 | for (i=0; i<n; i++) {\r |
| 252 | v1 = ReadOp(v1addr, opsize);\r |
| 253 | (void)fp_exec(040+oplen, ACCUM, s ,v1); /* ACCU := s*v1 */\r |
| 254 | v2 = ReadOp(v2addr, opsize);\r |
| 255 | (void)fp_exec(004+oplen,&v3,v2,NOP); /* v3 := v2 + s*v1 */\r |
| 256 | WriteOp(v3addr, v3, opsize); /* write result */\r |
| 257 | v1addr += ix1; /* forward to next array elements */\r |
| 258 | v2addr += ix2;\r |
| 259 | v3addr += ix3;\r |
| 260 | }\r |
| 261 | }\r |
| 262 | \r |
| 263 | static void vis_vabs(OPS op, OPSIZE opsize)\r |
| 264 | {\r |
| 265 | OP v1;\r |
| 266 | int16 delta = opsize==fp_f ? 2 : 4;\r |
| 267 | uint32 v1addr = op[0].word;\r |
| 268 | int16 ix1 = INT16(op[1].word) * delta;\r |
| 269 | uint32 v2addr = op[2].word;\r |
| 270 | int32 ix2 = INT16(op[3].word) * delta;\r |
| 271 | int16 i,n = INT16(op[4].word);\r |
| 272 | \r |
| 273 | if (n <= 0) return;\r |
| 274 | /* calculates v2[j] = ABS(v1[i]) for incrementing i,j */\r |
| 275 | for (i=0; i<n; i++) {\r |
| 276 | v1 = ReadOp(v1addr, opsize);\r |
| 277 | vis_abs(&v1,opsize); /* make absolute value */\r |
| 278 | WriteOp(v2addr, v1, opsize); /* write result */\r |
| 279 | v1addr += ix1; /* forward to next array elements */\r |
| 280 | v2addr += ix2;\r |
| 281 | }\r |
| 282 | }\r |
| 283 | \r |
| 284 | static void vis_trunc(OP* out, OP in)\r |
| 285 | {\r |
| 286 | /* Note there is fp_trun(), but this doesn't seem to do the same conversion\r |
| 287 | * as the original code does */\r |
| 288 | out->fpk[0] = in.fpk[0];\r |
| 289 | out->fpk[1] = (in.fpk[1] & 0177400) | (in.fpk[3] & 0377);\r |
| 290 | }\r |
| 291 | \r |
| 292 | static void vis_vsmnm(OPS op,OPSIZE opsize,t_bool doabs)\r |
| 293 | {\r |
| 294 | uint32 fpuop;\r |
| 295 | OP v1,sumnrm = zero;\r |
| 296 | int16 delta = opsize==fp_f ? 2 : 4;\r |
| 297 | uint32 saddr = op[0].word;\r |
| 298 | uint32 v1addr = op[1].word;\r |
| 299 | int16 ix1 = INT16(op[2].word) * delta;\r |
| 300 | int16 i,n = INT16(op[3].word);\r |
| 301 | \r |
| 302 | if (n <= 0) return;\r |
| 303 | /* calculates sumnrm = sumnrm + DBLE(v1[i]) resp DBLE(ABS(v1[i])) for incrementing i */\r |
| 304 | for (i=0; i<n; i++) {\r |
| 305 | v1 = ReadOp(v1addr, opsize);\r |
| 306 | if (opsize==fp_f) (void)fp_cvt(&v1,fp_f,fp_t); /* cvt to DBLE(v1) */\r |
| 307 | fpuop = (doabs && GET_MSIGN(&v1)) ? 022 : 002; /* use subtract for NRM && V1<0 */\r |
| 308 | (void)fp_exec(fpuop,&sumnrm, sumnrm, v1); /* accumulate */\r |
| 309 | v1addr += ix1; /* forward to next array elements */\r |
| 310 | }\r |
| 311 | if (opsize==fp_f)\r |
| 312 | (void)vis_trunc(&sumnrm,sumnrm); /* truncate to SNGL(sumnrm) */\r |
| 313 | WriteOp(saddr, sumnrm, opsize); /* write result */\r |
| 314 | }\r |
| 315 | \r |
| 316 | static void vis_vdot(OPS op,OPSIZE opsize)\r |
| 317 | {\r |
| 318 | OP v1,v2,dot = zero;\r |
| 319 | int16 delta = opsize==fp_f ? 2 : 4;\r |
| 320 | uint32 daddr = op[0].word;\r |
| 321 | uint32 v1addr = op[1].word;\r |
| 322 | int16 ix1 = INT16(op[2].word) * delta;\r |
| 323 | uint32 v2addr = op[3].word;\r |
| 324 | int16 ix2 = INT16(op[4].word) * delta;\r |
| 325 | int16 i,n = INT16(op[5].word);\r |
| 326 | \r |
| 327 | if (n <= 0) return;\r |
| 328 | /* calculates dot = dot + v1[i]*v2[j] for incrementing i,j */\r |
| 329 | for (i=0; i<n; i++) {\r |
| 330 | v1 = ReadOp(v1addr, opsize);\r |
| 331 | if (opsize==fp_f) (void)fp_cvt(&v1,fp_f,fp_t); /* cvt to DBLE(v1) */\r |
| 332 | v2 = ReadOp(v2addr, opsize);\r |
| 333 | if (opsize==fp_f) (void)fp_cvt(&v2,fp_f,fp_t); /* cvt to DBLE(v2) */\r |
| 334 | (void)fp_exec(042,ACCUM, v1, v2); /* ACCU := v1 * v2 */\r |
| 335 | (void)fp_exec(006,&dot,dot,NOP); /* dot := dot + v1*v2 */\r |
| 336 | v1addr += ix1; /* forward to next array elements */\r |
| 337 | v2addr += ix2;\r |
| 338 | }\r |
| 339 | if (opsize==fp_f)\r |
| 340 | (void)vis_trunc(&dot,dot); /* truncate to SNGL(sumnrm) */\r |
| 341 | WriteOp(daddr, dot, opsize); /* write result */\r |
| 342 | }\r |
| 343 | \r |
| 344 | static void vis_movswp(OPS op, OPSIZE opsize, t_bool doswp)\r |
| 345 | {\r |
| 346 | OP v1,v2;\r |
| 347 | int16 delta = opsize==fp_f ? 2 : 4;\r |
| 348 | uint32 v1addr = op[0].word;\r |
| 349 | int16 ix1 = INT16(op[1].word) * delta;\r |
| 350 | uint32 v2addr = op[2].word;\r |
| 351 | int16 ix2 = INT16(op[3].word) * delta;\r |
| 352 | int16 i,n = INT16(op[4].word);\r |
| 353 | \r |
| 354 | if (n <= 0) return;\r |
| 355 | for (i=0; i<n; i++) {\r |
| 356 | v1 = ReadOp(v1addr, opsize);\r |
| 357 | v2 = ReadOp(v2addr, opsize);\r |
| 358 | WriteOp(v2addr, v1, opsize); /* v2 := v1 */\r |
| 359 | if (doswp) WriteOp(v1addr, v2, opsize); /* v1 := v2 */\r |
| 360 | v1addr += ix1; /* forward to next array elements */\r |
| 361 | v2addr += ix2;\r |
| 362 | }\r |
| 363 | }\r |
| 364 | \r |
| 365 | t_stat cpu_vis (uint32 IR, uint32 intrq)\r |
| 366 | {\r |
| 367 | t_stat reason = SCPE_OK;\r |
| 368 | OPS op;\r |
| 369 | OP ret;\r |
| 370 | uint32 entry, rtn, rtn1 = 0, subcode = 0;\r |
| 371 | OP_PAT pattern;\r |
| 372 | OPSIZE opsize;\r |
| 373 | t_bool debug = DEBUG_PRI (cpu_dev, DEB_VIS);\r |
| 374 | \r |
| 375 | if ((cpu_unit.flags & UNIT_VIS) == 0) /* VIS option installed? */\r |
| 376 | return stop_inst;\r |
| 377 | \r |
| 378 | opsize = (IR & 004000) ? fp_t : fp_f; /* double or single precision */\r |
| 379 | entry = IR & 017; /* mask to entry point */\r |
| 380 | pattern = op_vis[entry];\r |
| 381 | \r |
| 382 | if (entry==0) { /* retrieve sub opcode */\r |
| 383 | ret = ReadOp (PC, in_s); /* get it */\r |
| 384 | subcode = ret.word;\r |
| 385 | if (subcode & 0100000) /* special property of ucode */\r |
| 386 | subcode = AR; /* for reentry */ \r |
| 387 | PC = (PC + 1) & VAMASK; /* bump to real argument list */\r |
| 388 | pattern = (subcode & 0400) ? OP_AAKAKK : OP_AKAKAKK; /* scalar or vector operation */\r |
| 389 | }\r |
| 390 | \r |
| 391 | if (pattern != OP_N)\r |
| 392 | if (op_ftnret[entry]) { /* most VIS instrs ignore RTN addr */\r |
| 393 | ret = ReadOp(PC, in_s);\r |
| 394 | rtn = rtn1 = ret.word; /* but save it just in case */\r |
| 395 | PC = (PC + 1) & VAMASK; /* move to next argument */\r |
| 396 | }\r |
| 397 | if (reason = cpu_ops (pattern, op, intrq)) /* get instruction operands */\r |
| 398 | return reason;\r |
| 399 | \r |
| 400 | if (debug) { /* debugging? */\r |
| 401 | fprintf (sim_deb, ">>CPU VIS: IR = %06o/%06o (", /* print preamble and IR */\r |
| 402 | IR, subcode);\r |
| 403 | fprint_sym (sim_deb, err_PC, (t_value *) &IR, /* print instruction mnemonic */\r |
| 404 | NULL, SWMASK('M'));\r |
| 405 | fprintf (sim_deb, "), P = %06o", err_PC); /* print location */\r |
| 406 | fprint_ops (pattern, op); /* print operands */\r |
| 407 | fputc ('\n', sim_deb); /* terminate line */\r |
| 408 | }\r |
| 409 | \r |
| 410 | switch (entry) { /* decode IR<3:0> */\r |
| 411 | case 000: /* .VECT (OP_special) */\r |
| 412 | if (subcode & 0400)\r |
| 413 | vis_svop(subcode,op,opsize); /* scalar/vector op */\r |
| 414 | else\r |
| 415 | vis_vvop(subcode,op,opsize); /* vector/vector op */\r |
| 416 | break;\r |
| 417 | case 001: /* VPIV (OP_(A)AAKAKAKK) */\r |
| 418 | vis_vpiv(op,opsize);\r |
| 419 | break;\r |
| 420 | case 002: /* VABS (OP_(A)AKAKK) */\r |
| 421 | vis_vabs(op,opsize);\r |
| 422 | break;\r |
| 423 | case 003: /* VSUM (OP_(A)AAKK) */\r |
| 424 | vis_vsmnm(op,opsize,FALSE);\r |
| 425 | break;\r |
| 426 | case 004: /* VNRM (OP_(A)AAKK) */\r |
| 427 | vis_vsmnm(op,opsize,TRUE);\r |
| 428 | break;\r |
| 429 | case 005: /* VDOT (OP_(A)AAKAKK) */\r |
| 430 | vis_vdot(op,opsize);\r |
| 431 | break;\r |
| 432 | case 006: /* VMAX (OP_(A)AAKK) */\r |
| 433 | vis_minmax(op,opsize,TRUE,FALSE);\r |
| 434 | break;\r |
| 435 | case 007: /* VMAB (OP_(A)AAKK) */\r |
| 436 | vis_minmax(op,opsize,TRUE,TRUE);\r |
| 437 | break;\r |
| 438 | case 010: /* VMIN (OP_(A)AAKK) */\r |
| 439 | vis_minmax(op,opsize,FALSE,FALSE);\r |
| 440 | break;\r |
| 441 | case 011: /* VMIB (OP_(A)AAKK) */\r |
| 442 | vis_minmax(op,opsize,FALSE,TRUE);\r |
| 443 | break;\r |
| 444 | case 012: /* VMOV (OP_(A)AKAKK) */\r |
| 445 | vis_movswp(op,opsize,FALSE);\r |
| 446 | break;\r |
| 447 | case 013: /* VSWP (OP_(A)AKAKK) */\r |
| 448 | vis_movswp(op,opsize,TRUE);\r |
| 449 | break;\r |
| 450 | case 014: /* .ERES (OP_(A)AA) */\r |
| 451 | reason = cpu_ema_eres(&rtn,op[2].word,PC,debug); /* handle the ERES instruction */\r |
| 452 | PC = rtn;\r |
| 453 | if (debug)\r |
| 454 | fprintf (sim_deb, \r |
| 455 | ">>CPU VIS: return .ERES: AR = %06o, BR = %06o, rtn=%s\n",\r |
| 456 | AR, BR, PC==op[0].word ? "error" : "good");\r |
| 457 | break;\r |
| 458 | \r |
| 459 | case 015: /* .ESEG (OP_(A)A) */\r |
| 460 | reason = cpu_ema_eseg(&rtn,IR,op[0].word,debug); /* handle the ESEG instruction */ \r |
| 461 | PC = rtn;\r |
| 462 | if (debug)\r |
| 463 | fprintf (sim_deb,\r |
| 464 | ">>CPU VIS: return .ESEG: AR = %06o , BR = %06o, rtn=%s\n",\r |
| 465 | AR, BR, rtn==rtn1 ? "error" : "good");\r |
| 466 | break;\r |
| 467 | \r |
| 468 | case 016: /* .VSET (OP_(A)AAACCC) */\r |
| 469 | reason = cpu_ema_vset(&rtn,op,debug);\r |
| 470 | PC = rtn;\r |
| 471 | if (debug)\r |
| 472 | fprintf (sim_deb, ">>CPU VIS: return .VSET: AR = %06o BR = %06o, rtn=%s\n",\r |
| 473 | AR, BR, \r |
| 474 | rtn==rtn1 ? "error" : (rtn==(rtn1+1) ? "hard" : "easy") );\r |
| 475 | break;\r |
| 476 | \r |
| 477 | case 017: /* [test] (OP_N) */\r |
| 478 | XR = 3; /* firmware revision */\r |
| 479 | SR = 0102077; /* test passed code */\r |
| 480 | PC = (PC + 1) & VAMASK; /* P+2 return for firmware w/VIS */\r |
| 481 | break;\r |
| 482 | default: /* others undefined */\r |
| 483 | reason = stop_inst;\r |
| 484 | }\r |
| 485 | \r |
| 486 | return reason;\r |
| 487 | }\r |
| 488 | \r |
| 489 | \r |
| 490 | /* SIGNAL/1000 Instructions\r |
| 491 | \r |
| 492 | The SIGNAL/1000 instructions provide fast Fourier transforms and complex\r |
| 493 | arithmetic. They utilize the F-Series floating-point processor and the\r |
| 494 | Vector Instruction Set.\r |
| 495 | \r |
| 496 | Option implementation by CPU was as follows:\r |
| 497 | \r |
| 498 | 2114 2115 2116 2100 1000-M 1000-E 1000-F\r |
| 499 | ------ ------ ------ ------ ------ ------ ------\r |
| 500 | N/A N/A N/A N/A N/A N/A 92835A\r |
| 501 | \r |
| 502 | The routines are mapped to instruction codes as follows:\r |
| 503 | \r |
| 504 | Instr. 1000-F Description\r |
| 505 | ------ ------ ----------------------------------------------\r |
| 506 | BITRV 105600 Bit reversal\r |
| 507 | BTRFY 105601 Butterfly algorithm\r |
| 508 | UNSCR 105602 Unscramble for phasor MPY\r |
| 509 | PRSCR 105603 Unscramble for phasor MPY\r |
| 510 | BITR1 105604 Swap two elements in array (alternate format)\r |
| 511 | BTRF1 105605 Butterfly algorithm (alternate format)\r |
| 512 | .CADD 105606 Complex number addition\r |
| 513 | .CSUB 105607 Complex number subtraction\r |
| 514 | .CMPY 105610 Complex number multiplication\r |
| 515 | .CDIV 105611 Complex number division\r |
| 516 | CONJG 105612 Complex conjugate\r |
| 517 | ..CCM 105613 Complex complement\r |
| 518 | AIMAG 105614 Return imaginary part\r |
| 519 | CMPLX 105615 Form complex number\r |
| 520 | [nop] 105616 [no operation]\r |
| 521 | [test] 105617 [self test]\r |
| 522 | \r |
| 523 | Notes:\r |
| 524 | \r |
| 525 | 1. SIGNAL/1000 ROM data are available from Bitsavers.\r |
| 526 | \r |
| 527 | Additional references (documents unavailable):\r |
| 528 | - HP Signal/1000 User Reference and Installation Manual (92835-90002).\r |
| 529 | - SIGNAL/1000 Microcode Source (92835-18075, revision 2).\r |
| 530 | */\r |
| 531 | \r |
| 532 | #define RE(x) (x+0)\r |
| 533 | #define IM(x) (x+2)\r |
| 534 | \r |
| 535 | static const OP_PAT op_signal[16] = {\r |
| 536 | OP_AAKK, OP_AAFFKK, OP_AAFFKK,OP_AAFFKK, /* BITRV BTRFY UNSCR PRSCR */\r |
| 537 | OP_AAAKK, OP_AAAFFKK,OP_AAA, OP_AAA, /* BITR1 BTRF1 .CADD .CSUB */\r |
| 538 | OP_AAA, OP_AAA, OP_AAA, OP_A, /* .CMPY .CDIV CONJG ..CCM */\r |
| 539 | OP_AA, OP_AAFF, OP_N, OP_N /* AIMAG CMPLX --- [test]*/\r |
| 540 | };\r |
| 541 | \r |
| 542 | /* complex addition helper */\r |
| 543 | static void sig_caddsub(uint32 addsub,OPS op)\r |
| 544 | {\r |
| 545 | OP a,b,c,d,p1,p2;\r |
| 546 | \r |
| 547 | a = ReadOp(RE(op[1].word), fp_f); /* read 1st op */\r |
| 548 | b = ReadOp(IM(op[1].word), fp_f);\r |
| 549 | c = ReadOp(RE(op[2].word), fp_f); /* read 2nd op */\r |
| 550 | d = ReadOp(IM(op[2].word), fp_f);\r |
| 551 | (void)fp_exec(addsub,&p1, a, c); /* add real */\r |
| 552 | (void)fp_exec(addsub,&p2, b, d); /* add imag */\r |
| 553 | WriteOp(RE(op[0].word), p1, fp_f); /* write result */\r |
| 554 | WriteOp(IM(op[0].word), p2, fp_f); /* write result */\r |
| 555 | }\r |
| 556 | \r |
| 557 | /* butterfly operation helper */\r |
| 558 | static void sig_btrfy(uint32 re,uint32 im,OP wr,OP wi,uint32 k, uint32 n2)\r |
| 559 | {\r |
| 560 | /*\r |
| 561 | * v(k)-------->o-->o----> v(k)\r |
| 562 | * \ /\r |
| 563 | * x\r |
| 564 | * / \ \r |
| 565 | * v(k+N/2)---->o-->o----> v(k+N/2)\r |
| 566 | * Wn -1\r |
| 567 | * \r |
| 568 | */\r |
| 569 | \r |
| 570 | OP p1,p2,p3,p4;\r |
| 571 | OP v1r = ReadOp(re+k, fp_f); /* read v1 */\r |
| 572 | OP v1i = ReadOp(im+k, fp_f);\r |
| 573 | OP v2r = ReadOp(re+k+n2, fp_f); /* read v2 */\r |
| 574 | OP v2i = ReadOp(im+k+n2, fp_f);\r |
| 575 | \r |
| 576 | /* (p1,p2) := cmul(w,v2) */\r |
| 577 | (void)fp_exec(040, &p1, wr, v2r); /* S7,8 p1 := wr*v2r */\r |
| 578 | (void)fp_exec(040, ACCUM, wi, v2i); /* ACCUM := wi*v2i */\r |
| 579 | (void)fp_exec(024, &p1, p1, NOP); /* S7,S8 p1 := wr*v2r-wi*v2i ==real(w*v2) */\r |
| 580 | (void)fp_exec(040, &p2, wi, v2r); /* S9,10 p2 := wi*v2r */\r |
| 581 | (void)fp_exec(040, ACCUM, wr, v2i); /* ACCUM := wr*v2i */\r |
| 582 | (void)fp_exec(004, &p2, p2, NOP); /* S9,10 p2 := wi*v2r+wr*v2i ==imag(w*v2) */\r |
| 583 | /* v2 := v1 - (p1,p2) */\r |
| 584 | (void)fp_exec(020, &p3, v1r, p1); /* v2r := v1r-real(w*v2) */\r |
| 585 | (void)fp_exec(020, &p4, v1i, p2); /* v2i := v1i-imag(w*v2) */\r |
| 586 | WriteOp(re+k+n2, p3, fp_f); /* write v2r */\r |
| 587 | WriteOp(im+k+n2, p4, fp_f); /* write v2i */\r |
| 588 | /* v1 := v1 + (p1,p2) */\r |
| 589 | (void)fp_exec(0, &p3, v1r, p1); /* v1r := v1r+real(w*v2) */\r |
| 590 | (void)fp_exec(0, &p4, v1i, p2); /* v1i := v1i+imag(w*v2) */\r |
| 591 | WriteOp(re+k, p3, fp_f); /* write v1r */\r |
| 592 | WriteOp(im+k, p4, fp_f); /* write v1i */\r |
| 593 | O = 0;\r |
| 594 | }\r |
| 595 | \r |
| 596 | /* helper for bit reversal\r |
| 597 | * idx is 0-based already */\r |
| 598 | static void sig_bitrev(uint32 re,uint32 im, uint32 idx, uint32 log2n, int sz)\r |
| 599 | {\r |
| 600 | uint32 i, org=idx, rev = 0;\r |
| 601 | OP v1r,v1i,v2r,v2i;\r |
| 602 | \r |
| 603 | for (i=0; i<log2n; i++) { /* swap bits of idx */\r |
| 604 | rev = (rev<<1) | (org & 1); /* into rev */\r |
| 605 | org >>= 1;\r |
| 606 | }\r |
| 607 | \r |
| 608 | if (rev < idx) return; /* avoid swapping same pair twice in loop */\r |
| 609 | \r |
| 610 | idx *= sz; /* adjust for element size */\r |
| 611 | rev *= sz; /* (REAL*4 vs COMPLEX*8) */\r |
| 612 | \r |
| 613 | v1r = ReadOp(re+idx, fp_f); /* read 1st element */ \r |
| 614 | v1i = ReadOp(im+idx, fp_f);\r |
| 615 | v2r = ReadOp(re+rev, fp_f); /* read 2nd element */\r |
| 616 | v2i = ReadOp(im+rev, fp_f);\r |
| 617 | WriteOp(re+idx, v2r, fp_f); /* swap elements */\r |
| 618 | WriteOp(im+idx, v2i, fp_f);\r |
| 619 | WriteOp(re+rev, v1r, fp_f);\r |
| 620 | WriteOp(im+rev, v1i, fp_f);\r |
| 621 | }\r |
| 622 | \r |
| 623 | /* helper for PRSCR/UNSCR */\r |
| 624 | static OP sig_scadd(uint32 oper,t_bool addh, OP a, OP b)\r |
| 625 | {\r |
| 626 | OP r;\r |
| 627 | static const OP plus_half = { { 0040000, 0000000 } }; /* DEC +0.5 */\r |
| 628 | \r |
| 629 | (void)fp_exec(oper,&r,a,b); /* calculate r := a +/- b */\r |
| 630 | if (addh) (void)fp_exec(044,&r,plus_half,NOP); /* if addh set, multiply by 0.5 */\r |
| 631 | return r;\r |
| 632 | }\r |
| 633 | \r |
| 634 | /* complex multiply helper */\r |
| 635 | static void sig_cmul(OP *r, OP *i, OP a, OP b, OP c, OP d)\r |
| 636 | {\r |
| 637 | OP p;\r |
| 638 | (void)fp_exec(040, &p , a, c); /* p := ac */\r |
| 639 | (void)fp_exec(040, ACCUM, b, d); /* ACCUM := bd */\r |
| 640 | (void)fp_exec(024, r, p , NOP); /* real := ac-bd */\r |
| 641 | (void)fp_exec(040, &p, a, d); /* p := ad */\r |
| 642 | (void)fp_exec(040, ACCUM, b, c); /* ACCUM := bc */\r |
| 643 | (void)fp_exec(004, i, p, NOP); /* imag := ad+bc */\r |
| 644 | }\r |
| 645 | \r |
| 646 | t_stat cpu_signal (uint32 IR, uint32 intrq)\r |
| 647 | {\r |
| 648 | t_stat reason = SCPE_OK;\r |
| 649 | OPS op;\r |
| 650 | OP a,b,c,d,p1,p2,p3,p4,m1,m2,wr,wi;\r |
| 651 | uint32 entry, v, idx1, idx2;\r |
| 652 | int32 exc, exd;\r |
| 653 | \r |
| 654 | t_bool debug = DEBUG_PRI (cpu_dev, DEB_SIG);\r |
| 655 | if ((cpu_unit.flags & UNIT_SIGNAL) == 0) /* SIGNAL option installed? */\r |
| 656 | return stop_inst;\r |
| 657 | \r |
| 658 | entry = IR & 017; /* mask to entry point */\r |
| 659 | \r |
| 660 | if (op_signal[entry] != OP_N)\r |
| 661 | if (reason = cpu_ops (op_signal[entry], op, intrq)) /* get instruction operands */\r |
| 662 | return reason;\r |
| 663 | \r |
| 664 | if (debug) { /* debugging? */\r |
| 665 | fprintf (sim_deb, ">>CPU SIG: IR = %06o (", IR); /* print preamble and IR */\r |
| 666 | fprint_sym (sim_deb, err_PC, (t_value *) &IR, /* print instruction mnemonic */\r |
| 667 | NULL, SWMASK('M'));\r |
| 668 | fprintf (sim_deb, "), P = %06o", err_PC); /* print location */\r |
| 669 | fprint_ops (op_signal[entry], op); /* print operands */\r |
| 670 | fputc ('\n', sim_deb); /* terminate line */\r |
| 671 | }\r |
| 672 | \r |
| 673 | switch (entry) { /* decode IR<3:0> */\r |
| 674 | case 000: /* BITRV (OP_AAKK) */\r |
| 675 | /* BITRV\r |
| 676 | * bit reversal for FFT\r |
| 677 | * JSB BITRV\r |
| 678 | * DEF ret(,I) return address\r |
| 679 | * DEF vect,I base address of array\r |
| 680 | * DEF idx,I index bitmap to be reversed (one-based)\r |
| 681 | * DEF nbits,I number of bits of index\r |
| 682 | *\r |
| 683 | * Given a complex*8 vector of nbits (power of 2), this calculates:\r |
| 684 | * swap( vect[idx], vect[rev(idx)]) where rev(i) is the bitreversed value of i */\r |
| 685 | sig_bitrev(op[1].word, op[1].word+2, op[2].word-1, op[3].word, 4);\r |
| 686 | PC = op[0].word & VAMASK;\r |
| 687 | break;\r |
| 688 | \r |
| 689 | case 001: /* BTRFY (OP_AAFFKK) */\r |
| 690 | /* BTRFY - butterfly operation\r |
| 691 | * JSB BTRFY\r |
| 692 | * DEF ret(,I) return address\r |
| 693 | * DEF vect(,I) complex*8 vector\r |
| 694 | * DEF wr,I real part of W\r |
| 695 | * DEF wi,I imag part of W\r |
| 696 | * DEF node,I index of 1st op (1 based)\r |
| 697 | * DEF lmax,I offset to 2nd op (0 based) */\r |
| 698 | sig_btrfy(op[1].word, op[1].word+2,\r |
| 699 | op[2], op[3], \r |
| 700 | 2*(op[4].word-1), 2*op[5].word);\r |
| 701 | PC = op[0].word & VAMASK;\r |
| 702 | break;\r |
| 703 | \r |
| 704 | case 002: /* UNSCR (OP_AAFFKK) */\r |
| 705 | /* UNSCR unscramble for phasor MPY\r |
| 706 | * JSB UNSCR\r |
| 707 | * DEF ret(,I)\r |
| 708 | * DEF vector,I\r |
| 709 | * DEF WR\r |
| 710 | * DEF WI\r |
| 711 | * DEF idx1,I\r |
| 712 | * DEF idx2,I */\r |
| 713 | v = op[1].word;\r |
| 714 | idx1 = 2 * (op[4].word - 1);\r |
| 715 | idx2 = 2 * (op[5].word - 1);\r |
| 716 | wr = op[2]; /* read WR */\r |
| 717 | wi = op[3]; /* read WI */\r |
| 718 | p1 = ReadOp(RE(v + idx1), fp_f); /* S1 VR[idx1] */\r |
| 719 | p2 = ReadOp(RE(v + idx2), fp_f); /* S2 VR[idx2] */\r |
| 720 | p3 = ReadOp(IM(v + idx1), fp_f); /* S9 VI[idx1] */\r |
| 721 | p4 = ReadOp(IM(v + idx2), fp_f); /* S10 VI[idx2] */\r |
| 722 | c = sig_scadd(000, TRUE, p3, p4); /* S5,6 0.5*(p3+p4) */\r |
| 723 | d = sig_scadd(020, TRUE, p2, p1); /* S7,8 0.5*(p2-p1) */\r |
| 724 | sig_cmul(&m1, &m2, wr, wi, c, d); /* (WR,WI) * (c,d) */\r |
| 725 | c = sig_scadd(000, TRUE, p1, p2); /* 0.5*(p1+p2) */\r |
| 726 | d = sig_scadd(020, TRUE, p3, p4); /* 0.5*(p3-p4) */\r |
| 727 | (void)fp_exec(000, &p1, c, m1); /* VR[idx1] := 0.5*(p1+p2) + real(W*(c,d)) */ \r |
| 728 | WriteOp(RE(v + idx1), p1, fp_f); \r |
| 729 | (void)fp_exec(000, &p2, d, m2); /* VI[idx1] := 0.5*(p3-p4) + imag(W*(c,d)) */\r |
| 730 | WriteOp(IM(v + idx1), p2, fp_f); \r |
| 731 | (void)fp_exec(020, &p1, c, m1); /* VR[idx2] := 0.5*(p1+p2) - imag(W*(c,d)) */\r |
| 732 | WriteOp(RE(v + idx2), p1, fp_f); \r |
| 733 | (void)fp_exec(020, &p2, d, m2); /* VI[idx2] := 0.5*(p3-p4) - imag(W*(c,d)) */\r |
| 734 | WriteOp(IM(v + idx2), p2, fp_f); \r |
| 735 | PC = op[0].word & VAMASK;\r |
| 736 | break; \r |
| 737 | \r |
| 738 | case 003: /* PRSCR (OP_AAFFKK) */\r |
| 739 | /* PRSCR unscramble for phasor MPY\r |
| 740 | * JSB PRSCR\r |
| 741 | * DEF ret(,I)\r |
| 742 | * DEF vector,I\r |
| 743 | * DEF WR\r |
| 744 | * DEF WI\r |
| 745 | * DEF idx1,I\r |
| 746 | * DEF idx2,I */\r |
| 747 | v = op[1].word;\r |
| 748 | idx1 = 2 * (op[4].word - 1);\r |
| 749 | idx2 = 2 * (op[5].word - 1);\r |
| 750 | wr = op[2]; /* read WR */\r |
| 751 | wi = op[3]; /* read WI */\r |
| 752 | p1 = ReadOp(RE(v + idx1), fp_f); /* VR[idx1] */\r |
| 753 | p2 = ReadOp(RE(v + idx2), fp_f); /* VR[idx2] */\r |
| 754 | p3 = ReadOp(IM(v + idx1), fp_f); /* VI[idx1] */\r |
| 755 | p4 = ReadOp(IM(v + idx2), fp_f); /* VI[idx2] */\r |
| 756 | c = sig_scadd(020, FALSE, p1, p2); /* p1-p2 */\r |
| 757 | d = sig_scadd(000, FALSE, p3, p4); /* p3+p4 */ \r |
| 758 | sig_cmul(&m1,&m2, wr, wi, c, d); /* (WR,WI) * (c,d) */\r |
| 759 | c = sig_scadd(000, FALSE, p1, p2); /* p1+p2 */\r |
| 760 | d = sig_scadd(020, FALSE, p3,p4); /* p3-p4 */\r |
| 761 | (void)fp_exec(020, &p1, c, m2); /* VR[idx1] := (p1-p2) - imag(W*(c,d)) */\r |
| 762 | WriteOp(RE(v + idx1), p1, fp_f); \r |
| 763 | (void)fp_exec(000, &p2, d, m1); /* VI[idx1] := (p3-p4) + real(W*(c,d)) */ \r |
| 764 | WriteOp(IM(v + idx1), p2, fp_f); \r |
| 765 | (void)fp_exec(000, &p1, c, m2); /* VR[idx2] := (p1+p2) + imag(W*(c,d)) */\r |
| 766 | WriteOp(RE(v + idx2), p1, fp_f); \r |
| 767 | (void)fp_exec(020, &p2, m1, d); /* VI[idx2] := imag(W*(c,d)) - (p3-p4) */\r |
| 768 | WriteOp(IM(v + idx2), p2, fp_f); \r |
| 769 | PC = op[0].word & VAMASK;\r |
| 770 | break;\r |
| 771 | \r |
| 772 | case 004: /* BITR1 (OP_AAAKK) */\r |
| 773 | /* BITR1\r |
| 774 | * bit reversal for FFT, alternative version\r |
| 775 | * JSB BITR1\r |
| 776 | * DEF ret(,I) return address if already swapped\r |
| 777 | * DEF revect,I base address of real vect\r |
| 778 | * DEF imvect,I base address of imag vect\r |
| 779 | * DEF idx,I index bitmap to be reversed (one-based)\r |
| 780 | * DEF nbits,I number of bits of index\r |
| 781 | *\r |
| 782 | * Given a complex*8 vector of nbits (power of 2), this calculates:\r |
| 783 | * swap( vect[idx], vect[rev(idx)]) where rev(i) is the bitreversed value of i\r |
| 784 | *\r |
| 785 | * difference to BITRV is that BITRV uses complex*8, and BITR1 uses separate real*4\r |
| 786 | * vectors for Real and Imag parts */\r |
| 787 | sig_bitrev(op[1].word, op[2].word, op[3].word-1, op[4].word, 2);\r |
| 788 | PC = op[0].word & VAMASK;\r |
| 789 | break;\r |
| 790 | \r |
| 791 | \r |
| 792 | case 005: /* BTRF1 (OP_AAAFFKK) */\r |
| 793 | /* BTRF1 - butterfly operation with real*4 vectors\r |
| 794 | * JSB BTRF1\r |
| 795 | * DEF ret(,I) return address\r |
| 796 | * DEF rvect,I real part of vector\r |
| 797 | * DEF ivect,I imag part of vector\r |
| 798 | * DEF wr,I real part of W\r |
| 799 | * DEF wi,I imag part of W\r |
| 800 | * DEF node,I index (1 based)\r |
| 801 | * DEF lmax,I index (0 based) */\r |
| 802 | sig_btrfy(op[1].word, op[2].word,\r |
| 803 | op[3], op[4],\r |
| 804 | op[5].word-1, op[6].word);\r |
| 805 | PC = op[0].word & VAMASK;\r |
| 806 | break;\r |
| 807 | \r |
| 808 | case 006: /* .CADD (OP_AAA) */\r |
| 809 | /* .CADD Complex addition\r |
| 810 | * JSB .CADD\r |
| 811 | * DEF result,I\r |
| 812 | * DEF oprd1,I\r |
| 813 | * DEF oprd2,I\r |
| 814 | * complex addition is: (a+bi) + (c+di) => (a+c) + (b+d)i */\r |
| 815 | sig_caddsub(000,op);\r |
| 816 | break;\r |
| 817 | \r |
| 818 | case 007: /* .CSUB (OP_AAA) */\r |
| 819 | /* .CSUB Complex subtraction\r |
| 820 | * JSB .CSUB\r |
| 821 | * DEF result,I\r |
| 822 | * DEF oprd1,I\r |
| 823 | * DEF oprd2,I\r |
| 824 | * complex subtraction is: (a+bi) - (c+di) => (a - c) + (b - d)i */\r |
| 825 | sig_caddsub(020,op);\r |
| 826 | break;\r |
| 827 | \r |
| 828 | case 010: /* .CMUL (OP_AAA) */\r |
| 829 | /* .CMPY Complex multiplication\r |
| 830 | * call:\r |
| 831 | * JSB .CMPY\r |
| 832 | * DEF result,I\r |
| 833 | * DEF oprd1,I\r |
| 834 | * DEF oprd2,I\r |
| 835 | * complex multiply is: (a+bi)*(c+di) => (ac-bd) + (ad+bc)i */\r |
| 836 | a = ReadOp(RE(op[1].word), fp_f); /* read 1st op */\r |
| 837 | b = ReadOp(IM(op[1].word), fp_f);\r |
| 838 | c = ReadOp(RE(op[2].word), fp_f); /* read 2nd op */\r |
| 839 | d = ReadOp(IM(op[2].word), fp_f);\r |
| 840 | sig_cmul(&p1, &p2, a, b, c, d);\r |
| 841 | WriteOp(RE(op[0].word), p1, fp_f); /* write real result */\r |
| 842 | WriteOp(IM(op[0].word), p2, fp_f); /* write imag result */\r |
| 843 | break;\r |
| 844 | \r |
| 845 | case 011: /* .CDIV (OP_AAA) */\r |
| 846 | /* .CDIV Complex division\r |
| 847 | * call:\r |
| 848 | * JSB .CDIV\r |
| 849 | * DEF result,I\r |
| 850 | * DEF oprd1,I\r |
| 851 | * DEF oprd2,I\r |
| 852 | * complex division is: (a+bi)/(c+di) => ((ac+bd) + (bc-ad)i)/(c^2+d^2) */\r |
| 853 | a = ReadOp(RE(op[1].word), fp_f); /* read 1st op */\r |
| 854 | b = ReadOp(IM(op[1].word), fp_f);\r |
| 855 | c = ReadOp(RE(op[2].word), fp_f); /* read 2nd op */\r |
| 856 | d = ReadOp(IM(op[2].word), fp_f);\r |
| 857 | (void)fp_unpack (NULL, &exc, c, fp_f); /* get exponents */\r |
| 858 | (void)fp_unpack (NULL, &exd, d, fp_f);\r |
| 859 | if (exc < exd) { /* ensure c/d < 1 */\r |
| 860 | p1 = a; a = c; c = p1; /* swap dividend and divisor */\r |
| 861 | p1 = b; b = d; d = p1;\r |
| 862 | }\r |
| 863 | (void)fp_exec(060, &p1, d, c); /* p1,accu := d/c */\r |
| 864 | (void)fp_exec(044, ACCUM, d, NOP); /* ACCUM := dd/c */\r |
| 865 | (void)fp_exec(004, &p2, c, NOP); /* p2 := c + dd/c */\r |
| 866 | (void)fp_exec(040, ACCUM, b, p1); /* ACCUM := bd/c */\r |
| 867 | (void)fp_exec(004, ACCUM, a, NOP); /* ACCUM := a + bd/c */\r |
| 868 | (void)fp_exec(070, &p3, NOP, p2); /* p3 := (a+bd/c)/(c+dd/c) == (ac+bd)/(cc+dd) */\r |
| 869 | WriteOp(RE(op[0].word), p3, fp_f); /* Write real result */\r |
| 870 | (void)fp_exec(040, ACCUM, a, p1); /* ACCUM := ad/c */\r |
| 871 | (void)fp_exec(030, ACCUM, NOP, b); /* ACCUM := ad/c - b */\r |
| 872 | if (exd < exc) { /* was not swapped? */\r |
| 873 | (void)fp_exec(024, ACCUM, zero, NOP); /* ACCUM := -ACCUM */\r |
| 874 | }\r |
| 875 | (void)fp_exec(070, &p3, NOP, p2); /* p3 := (b-ad/c)/(c+dd/c) == (bc-ad)/cc+dd) */\r |
| 876 | WriteOp(IM(op[0].word), p3, fp_f); /* Write imag result */\r |
| 877 | break;\r |
| 878 | \r |
| 879 | case 012: /* CONJG (OP_AAA) */\r |
| 880 | /* CONJG build A-Bi from A+Bi\r |
| 881 | * call:\r |
| 882 | * JSB CONJG\r |
| 883 | * DEF RTN\r |
| 884 | * DEF res,I result\r |
| 885 | * DEF arg,I input argument */\r |
| 886 | a = ReadOp(RE(op[2].word), fp_f); /* read real */\r |
| 887 | b = ReadOp(IM(op[2].word), fp_f); /* read imag */\r |
| 888 | (void)fp_pcom(&b, fp_f); /* negate imag */\r |
| 889 | WriteOp(RE(op[1].word), a, fp_f); /* write real */\r |
| 890 | WriteOp(IM(op[1].word), b, fp_f); /* write imag */\r |
| 891 | break;\r |
| 892 | \r |
| 893 | case 013: /* ..CCM (OP_A) */\r |
| 894 | /* ..CCM complement complex\r |
| 895 | * call\r |
| 896 | * JSB ..CCM\r |
| 897 | * DEF arg\r |
| 898 | * build (-RE,-IM)\r |
| 899 | */\r |
| 900 | v = op[0].word;\r |
| 901 | a = ReadOp(RE(v), fp_f); /* read real */\r |
| 902 | b = ReadOp(IM(v), fp_f); /* read imag */\r |
| 903 | (void)fp_pcom(&a, fp_f); /* negate real */\r |
| 904 | (void)fp_pcom(&b, fp_f); /* negate imag */\r |
| 905 | WriteOp(RE(v), a, fp_f); /* write real */\r |
| 906 | WriteOp(IM(v), b, fp_f); /* write imag */\r |
| 907 | break;\r |
| 908 | \r |
| 909 | case 014: /* AIMAG (OP_AA) */\r |
| 910 | /* AIMAG return the imaginary part in AB\r |
| 911 | * JSB AIMAG\r |
| 912 | * DEF *+2\r |
| 913 | * DEF cplx(,I)\r |
| 914 | * returns: AB imaginary part of complex number */\r |
| 915 | a = ReadOp(IM(op[1].word), fp_f); /* read imag */\r |
| 916 | AR = a.fpk[0]; /* move MSB to A */\r |
| 917 | BR = a.fpk[1]; /* move LSB to B */\r |
| 918 | break;\r |
| 919 | \r |
| 920 | case 015: /* CMPLX (OP_AFF) */\r |
| 921 | /* CMPLX form a complex number\r |
| 922 | * JSB CMPLX\r |
| 923 | * DEF *+4\r |
| 924 | * DEF result,I complex number\r |
| 925 | * DEF repart,I real value\r |
| 926 | * DEF impart,I imaginary value */\r |
| 927 | WriteOp(RE(op[1].word), op[2], fp_f); /* write real part */ \r |
| 928 | WriteOp(IM(op[1].word), op[3], fp_f); /* write imag part */\r |
| 929 | break;\r |
| 930 | \r |
| 931 | case 017: /* [slftst] (OP_N) */\r |
| 932 | XR = 2; /* firmware revision */\r |
| 933 | SR = 0102077; /* test passed code */\r |
| 934 | PC = (PC + 1) & VAMASK; /* P+2 return for firmware w/SIGNAL1000 */\r |
| 935 | break;\r |
| 936 | \r |
| 937 | case 016: /* invalid */\r |
| 938 | default: /* others undefined */\r |
| 939 | reason = stop_inst;\r |
| 940 | }\r |
| 941 | \r |
| 942 | return reason;\r |
| 943 | }\r |
| 944 | \r |
| 945 | #endif /* end of int64 support */\r |