| 1 | /* pdp10_mdfp.c: PDP-10 multiply/divide and floating point simulator\r |
| 2 | \r |
| 3 | Copyright (c) 1993-2005, Robert M Supnik\r |
| 4 | \r |
| 5 | Permission is hereby granted, free of charge, to any person obtaining a\r |
| 6 | copy of this software and associated documentation files (the "Software"),\r |
| 7 | to deal in the Software without restriction, including without limitation\r |
| 8 | the rights to use, copy, modify, merge, publish, distribute, sublicense,\r |
| 9 | and/or sell copies of the Software, and to permit persons to whom the\r |
| 10 | Software is furnished to do so, subject to the following conditions:\r |
| 11 | \r |
| 12 | The above copyright notice and this permission notice shall be included in\r |
| 13 | all copies or substantial portions of the Software.\r |
| 14 | \r |
| 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\r |
| 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\r |
| 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\r |
| 18 | ROBERT M SUPNIK BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER\r |
| 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r |
| 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\r |
| 21 | \r |
| 22 | Except as contained in this notice, the name of Robert M Supnik shall not be\r |
| 23 | used in advertising or otherwise to promote the sale, use or other dealings\r |
| 24 | in this Software without prior written authorization from Robert M Supnik.\r |
| 25 | \r |
| 26 | 2-Apr-04 RMS Fixed bug in floating point unpack\r |
| 27 | Fixed bug in FIXR (found by Phil Stone, fixed by\r |
| 28 | Chris Smith)\r |
| 29 | 31-Aug-01 RMS Changed int64 to t_int64 for Windoze\r |
| 30 | 10-Aug-01 RMS Removed register in declarations\r |
| 31 | \r |
| 32 | Instructions handled in this module:\r |
| 33 | imul integer multiply\r |
| 34 | idiv integer divide\r |
| 35 | mul multiply\r |
| 36 | div divide\r |
| 37 | dmul double precision multiply\r |
| 38 | ddiv double precision divide\r |
| 39 | fad(r) floating add (and round)\r |
| 40 | fsb(r) floating subtract (and round)\r |
| 41 | fmp(r) floating multiply (and round)\r |
| 42 | fdv(r) floating divide and round\r |
| 43 | fsc floating scale\r |
| 44 | fix(r) floating to fixed (and round)\r |
| 45 | fltr fixed to floating and round\r |
| 46 | dfad double precision floating add/subtract\r |
| 47 | dfmp double precision floating multiply\r |
| 48 | dfdv double precision floating divide\r |
| 49 | \r |
| 50 | The PDP-10 stores double (quad) precision integers in sequential\r |
| 51 | AC's or memory locations. Integers are stored in 2's complement\r |
| 52 | form. Only the sign of the high order word matters; the signs\r |
| 53 | in low order words are ignored on input and set to the sign of\r |
| 54 | the result on output. Quad precision integers exist only in the\r |
| 55 | AC's as the result of a DMUL or the dividend of a DDIV.\r |
| 56 | \r |
| 57 | 0 00000000011111111112222222222333333\r |
| 58 | 0 12345678901234567890123456789012345\r |
| 59 | +-+-----------------------------------+\r |
| 60 | |S| high order integer | AC(n), A\r |
| 61 | +-+-----------------------------------+\r |
| 62 | |S| low order integer | AC(n + 1), A + 1\r |
| 63 | +-+-----------------------------------+\r |
| 64 | |S| low order integer | AC(n + 2)\r |
| 65 | +-+-----------------------------------+\r |
| 66 | |S| low order integer | AC(n + 3)\r |
| 67 | +-+-----------------------------------+\r |
| 68 | \r |
| 69 | The PDP-10 supports two floating point formats: single and double\r |
| 70 | precision. In both, the exponent is 8 bits, stored in excess\r |
| 71 | 128 notation. The fraction is expected to be normalized. A\r |
| 72 | single precision floating point number has 27 bits of fraction;\r |
| 73 | a double precision number has 62 bits of fraction (the sign\r |
| 74 | bit of the second word is ignored and is set to zero).\r |
| 75 | \r |
| 76 | In a negative floating point number, the exponent is stored in\r |
| 77 | one's complement form, the fraction in two's complement form.\r |
| 78 | \r |
| 79 | 0 00000000 011111111112222222222333333\r |
| 80 | 0 12345678 901234567890123456789012345\r |
| 81 | +-+--------+---------------------------+\r |
| 82 | |S|exponent| high order fraction | AC(n), A\r |
| 83 | +-+--------+---------------------------+\r |
| 84 | |0| low order fraction | AC(n + 1), A + 1\r |
| 85 | +-+------------------------------------+\r |
| 86 | \r |
| 87 | Note that treatment of the sign is different for double precision\r |
| 88 | integers and double precision floating point. DMOVN (implemented\r |
| 89 | as an inline macro) follows floating point conventions.\r |
| 90 | \r |
| 91 | The original PDP-10 CPU (KA10) used a different format for double\r |
| 92 | precision numbers and included certain instructions to make\r |
| 93 | software support easier. These instructions were phased out in\r |
| 94 | the KL10 and KS10 and are treated as MUUO's.\r |
| 95 | \r |
| 96 | The KL10 added extended precision (11-bit exponent) floating point\r |
| 97 | format (so-called G floating). These instructions were not\r |
| 98 | implemented in the KS10 and are treated as MUUO's.\r |
| 99 | */\r |
| 100 | \r |
| 101 | #include "pdp10_defs.h"\r |
| 102 | #include <setjmp.h>\r |
| 103 | \r |
| 104 | typedef struct { /* unpacked fp number */\r |
| 105 | int32 sign; /* sign */\r |
| 106 | int32 exp; /* exponent */\r |
| 107 | t_uint64 fhi; /* fraction high */\r |
| 108 | t_uint64 flo; /* for double prec */\r |
| 109 | } UFP;\r |
| 110 | \r |
| 111 | #define MSK32 0xFFFFFFFF\r |
| 112 | #define FIT27 (DMASK - 0x07FFFFFF)\r |
| 113 | #define FIT32 (DMASK - MSK32)\r |
| 114 | #define SFRC TRUE /* frac 2's comp */\r |
| 115 | #define AFRC FALSE /* frac abs value */\r |
| 116 | \r |
| 117 | /* In packed floating point number */\r |
| 118 | \r |
| 119 | #define FP_BIAS 0200 /* exponent bias */\r |
| 120 | #define FP_N_FHI 27 /* # of hi frac bits */\r |
| 121 | #define FP_V_FHI 0 /* must be zero */\r |
| 122 | #define FP_M_FHI 0000777777777\r |
| 123 | #define FP_N_EXP 8 /* # of exp bits */\r |
| 124 | #define FP_V_EXP (FP_V_FHI + FP_N_FHI)\r |
| 125 | #define FP_M_EXP 0377\r |
| 126 | #define FP_V_SIGN (FP_V_EXP + FP_N_EXP) /* sign */\r |
| 127 | #define FP_N_FLO 35 /* # of lo frac bits */\r |
| 128 | #define FP_V_FLO 0 /* must be zero */\r |
| 129 | #define FP_M_FLO 0377777777777\r |
| 130 | #define GET_FPSIGN(x) ((int32) (((x) >> FP_V_SIGN) & 1))\r |
| 131 | #define GET_FPEXP(x) ((int32) (((x) >> FP_V_EXP) & FP_M_EXP))\r |
| 132 | #define GET_FPHI(x) ((x) & FP_M_FHI)\r |
| 133 | #define GET_FPLO(x) ((x) & FP_M_FLO)\r |
| 134 | \r |
| 135 | /* In unpacked floating point number */\r |
| 136 | \r |
| 137 | #define FP_N_GUARD 1 /* # of guard bits */\r |
| 138 | #define FP_V_UFLO FP_N_GUARD /* <35:1> */\r |
| 139 | #define FP_V_URNDD (FP_V_UFLO - 1) /* dp round bit */\r |
| 140 | #define FP_V_UFHI (FP_V_UFLO + FP_N_FLO) /* <62:36> */\r |
| 141 | #define FP_V_URNDS (FP_V_UFHI - 1) /* sp round bit */\r |
| 142 | #define FP_V_UCRY (FP_V_UFHI + FP_N_FHI) /* <63> */\r |
| 143 | #define FP_V_UNORM (FP_V_UCRY - 1) /* normalized bit */\r |
| 144 | #define FP_UFHI 0x7FFFFFF000000000\r |
| 145 | #define FP_UFLO 0x0000000FFFFFFFFE\r |
| 146 | #define FP_UFRAC 0x7FFFFFFFFFFFFFFE\r |
| 147 | #define FP_URNDD 0x0000000000000001\r |
| 148 | #define FP_URNDS 0x0000000800000000\r |
| 149 | #define FP_UNORM 0x4000000000000000\r |
| 150 | #define FP_UCRY 0x8000000000000000\r |
| 151 | #define FP_ONES 0xFFFFFFFFFFFFFFFF\r |
| 152 | \r |
| 153 | #define UNEG(x) ((~x) + 1)\r |
| 154 | #define DUNEG(x) x.flo = UNEG (x.flo); x.fhi = ~x.fhi + (x.flo == 0)\r |
| 155 | \r |
| 156 | extern d10 *ac_cur; /* current AC block */\r |
| 157 | extern int32 flags; /* flags */\r |
| 158 | void mul (d10 a, d10 b, d10 *rs);\r |
| 159 | void funpack (d10 h, d10 l, UFP *r, t_bool sgn);\r |
| 160 | void fnorm (UFP *r, t_int64 rnd);\r |
| 161 | d10 fpack (UFP *r, d10 *lo, t_bool fdvneg);\r |
| 162 | \r |
| 163 | /* Integer multiply - checked against KS-10 ucode */\r |
| 164 | \r |
| 165 | d10 imul (d10 a, d10 b)\r |
| 166 | {\r |
| 167 | d10 rs[2];\r |
| 168 | \r |
| 169 | if ((a == SIGN) && (b == SIGN)) { /* KS10 hack */\r |
| 170 | SETF (F_AOV | F_T1); /* -2**35 squared */\r |
| 171 | return SIGN;\r |
| 172 | }\r |
| 173 | mul (a, b, rs); /* mpy, dprec result */\r |
| 174 | if (rs[0] && (rs[0] != ONES)) { /* high not all sign? */\r |
| 175 | rs[1] = TSTS (a ^ b)? SETS (rs[1]): CLRS (rs[1]); /* set sign */\r |
| 176 | SETF (F_AOV | F_T1); /* overflow */\r |
| 177 | }\r |
| 178 | return rs[1];\r |
| 179 | }\r |
| 180 | \r |
| 181 | /* Integer divide, return quotient, remainder - checked against KS10 ucode\r |
| 182 | The KS10 does not recognize -2^35/-1 as an error. Instead, it produces\r |
| 183 | 2^35 (that is, -2^35) as the incorrect result.\r |
| 184 | */\r |
| 185 | \r |
| 186 | t_bool idiv (d10 a, d10 b, d10 *rs)\r |
| 187 | {\r |
| 188 | d10 dvd = ABS (a); /* make ops positive */\r |
| 189 | d10 dvr = ABS (b);\r |
| 190 | \r |
| 191 | if (dvr == 0) { /* divide by 0? */\r |
| 192 | SETF (F_DCK | F_AOV | F_T1); /* set flags, return */\r |
| 193 | return FALSE;\r |
| 194 | }\r |
| 195 | rs[0] = dvd / dvr; /* get quotient */\r |
| 196 | rs[1] = dvd % dvr; /* get remainder */\r |
| 197 | if (TSTS (a ^ b)) rs[0] = NEG (rs[0]); /* sign of result */\r |
| 198 | if (TSTS (a)) rs[1] = NEG (rs[1]); /* sign of remainder */\r |
| 199 | return TRUE;\r |
| 200 | }\r |
| 201 | \r |
| 202 | /* Multiply, return double precision result - checked against KS10 ucode */\r |
| 203 | \r |
| 204 | void mul (d10 s1, d10 s2, d10 *rs)\r |
| 205 | {\r |
| 206 | t_uint64 a = ABS (s1);\r |
| 207 | t_uint64 b = ABS (s2);\r |
| 208 | t_uint64 t, u, r;\r |
| 209 | \r |
| 210 | if ((a == 0) || (b == 0)) { /* operand = 0? */\r |
| 211 | rs[0] = rs[1] = 0; /* result 0 */\r |
| 212 | return;\r |
| 213 | }\r |
| 214 | if ((a & FIT32) || (b & FIT32)) { /* fit in 64b? */\r |
| 215 | t = a >> 18; /* no, split in half */\r |
| 216 | a = a & RMASK; /* "dp" multiply */\r |
| 217 | u = b >> 18;\r |
| 218 | b = b & RMASK;\r |
| 219 | r = (a * b) + (((a * u) + (b * t)) << 18); /* low is only 35b */\r |
| 220 | rs[0] = ((t * u) << 1) + (r >> 35); /* so lsh hi 1 */\r |
| 221 | rs[1] = r & MMASK;\r |
| 222 | }\r |
| 223 | else {\r |
| 224 | r = a * b; /* fits, native mpy */\r |
| 225 | rs[0] = r >> 35; /* split at bit 35 */\r |
| 226 | rs[1] = r & MMASK;\r |
| 227 | }\r |
| 228 | \r |
| 229 | if (TSTS (s1 ^ s2)) { MKDNEG (rs); } /* result -? */\r |
| 230 | else if (TSTS (rs[0])) { /* result +, 2**70? */\r |
| 231 | SETF (F_AOV | F_T1); /* overflow */\r |
| 232 | rs[1] = SETS (rs[1]); /* consistent - */\r |
| 233 | }\r |
| 234 | return;\r |
| 235 | }\r |
| 236 | \r |
| 237 | /* Divide, return quotient and remainder - checked against KS10 ucode\r |
| 238 | Note that the initial divide check catches the case -2^70/-2^35;\r |
| 239 | thus, the quotient can have at most 35 bits.\r |
| 240 | */\r |
| 241 | \r |
| 242 | t_bool divi (int32 ac, d10 b, d10 *rs)\r |
| 243 | {\r |
| 244 | int32 p1 = ADDAC (ac, 1);\r |
| 245 | d10 dvr = ABS (b); /* make divr positive */\r |
| 246 | t_int64 t;\r |
| 247 | int32 i;\r |
| 248 | d10 dvd[2];\r |
| 249 | \r |
| 250 | dvd[0] = AC(ac); /* divd high */\r |
| 251 | dvd[1] = CLRS (AC(p1)); /* divd lo, clr sgn */\r |
| 252 | if (TSTS (AC(ac))) { DMOVN (dvd); } /* make divd positive */\r |
| 253 | if (dvd[0] >= dvr) { /* divide fail? */\r |
| 254 | SETF (F_AOV | F_DCK | F_T1); /* set flags, return */\r |
| 255 | return FALSE;\r |
| 256 | }\r |
| 257 | if (dvd[0] & FIT27) { /* fit in 63b? */\r |
| 258 | for (i = 0, rs[0] = 0; i < 35; i++) { /* 35 quotient bits */\r |
| 259 | dvd[0] = (dvd[0] << 1) | ((dvd[1] >> 34) & 1);\r |
| 260 | dvd[1] = (dvd[1] << 1) & MMASK; /* shift dividend */\r |
| 261 | rs[0] = rs[0] << 1; /* shift quotient */\r |
| 262 | if (dvd[0] >= dvr) { /* subtract work? */\r |
| 263 | dvd[0] = dvd[0] - dvr; /* quo bit is 1 */\r |
| 264 | rs[0] = rs[0] + 1;\r |
| 265 | }\r |
| 266 | }\r |
| 267 | rs[1] = dvd[0]; /* store remainder */\r |
| 268 | }\r |
| 269 | else {\r |
| 270 | t = (dvd[0] << 35) | dvd[1]; /* concatenate */\r |
| 271 | rs[0] = t / dvr; /* quotient */\r |
| 272 | rs[1] = t % dvr; /* remainder */\r |
| 273 | }\r |
| 274 | if (TSTS (AC(ac) ^ b)) rs[0] = NEG (rs[0]); /* sign of result */\r |
| 275 | if (TSTS (AC(ac))) rs[1] = NEG (rs[1]); /* sign of remainder */\r |
| 276 | return TRUE;\r |
| 277 | }\r |
| 278 | \r |
| 279 | /* Double precision multiply. This is done the old fashioned way. Cross\r |
| 280 | product multiplies would be a lot faster but would require more code.\r |
| 281 | */\r |
| 282 | \r |
| 283 | void dmul (int32 ac, d10 *mpy)\r |
| 284 | {\r |
| 285 | int32 p1 = ADDAC (ac, 1);\r |
| 286 | int32 p2 = ADDAC (ac, 2);\r |
| 287 | int32 p3 = ADDAC (ac, 3);\r |
| 288 | int32 i;\r |
| 289 | d10 mpc[2], sign;\r |
| 290 | \r |
| 291 | mpc[0] = AC(ac); /* mplcnd hi */\r |
| 292 | mpc[1] = CLRS (AC(p1)); /* mplcnd lo, clr sgn */\r |
| 293 | sign = mpc[0] ^ mpy[0]; /* sign of result */\r |
| 294 | if (TSTS (mpc[0])) { DMOVN (mpc); } /* get abs (mpcnd) */\r |
| 295 | if (TSTS (mpy[0])) { DMOVN (mpy); } /* get abs (mpyer) */\r |
| 296 | else mpy[1] = CLRS (mpy[1]); /* clear mpy lo sign */\r |
| 297 | AC(ac) = AC(p1) = AC(p2) = AC(p3) = 0; /* clear AC's */\r |
| 298 | if (((mpy[0] | mpy[1]) == 0) || ((mpc[0] | mpc[1]) == 0)) return;\r |
| 299 | for (i = 0; i < 71; i++) { /* 71 mpyer bits */\r |
| 300 | if (i) { /* shift res, mpy */\r |
| 301 | AC(p3) = (AC(p3) >> 1) | ((AC(p2) & 1) << 34);\r |
| 302 | AC(p2) = (AC(p2) >> 1) | ((AC(p1) & 1) << 34);\r |
| 303 | AC(p1) = (AC(p1) >> 1) | ((AC(ac) & 1) << 34);\r |
| 304 | AC(ac) = AC(ac) >> 1;\r |
| 305 | mpy[1] = (mpy[1] >> 1) | ((mpy[0] & 1) << 34);\r |
| 306 | mpy[0] = mpy[0] >> 1;\r |
| 307 | }\r |
| 308 | if (mpy[1] & 1) { /* if mpy lo bit = 1 */\r |
| 309 | AC(p1) = AC(p1) + mpc[1];\r |
| 310 | AC(ac) = AC(ac) + mpc[0] + (TSTS (AC(p1) != 0));\r |
| 311 | AC(p1) = CLRS (AC(p1));\r |
| 312 | }\r |
| 313 | }\r |
| 314 | if (TSTS (sign)) { /* result minus? */\r |
| 315 | AC(p3) = (-AC(p3)) & MMASK; /* quad negate */\r |
| 316 | AC(p2) = (~AC(p2) + (AC(p3) == 0)) & MMASK;\r |
| 317 | AC(p1) = (~AC(p1) + (AC(p2) == 0)) & MMASK;\r |
| 318 | AC(ac) = (~AC(ac) + (AC(p1) == 0)) & DMASK;\r |
| 319 | }\r |
| 320 | else if (TSTS (AC(ac))) SETF (F_AOV | F_T1); /* wrong sign */\r |
| 321 | if (TSTS (AC(ac))) { /* if result - */\r |
| 322 | AC(p1) = SETS (AC(p1)); /* make signs consistent */\r |
| 323 | AC(p2) = SETS (AC(p2));\r |
| 324 | AC(p3) = SETS (AC(p3));\r |
| 325 | }\r |
| 326 | return;\r |
| 327 | }\r |
| 328 | \r |
| 329 | /* Double precision divide - checked against KS10 ucode */\r |
| 330 | \r |
| 331 | void ddiv (int32 ac, d10 *dvr)\r |
| 332 | {\r |
| 333 | int32 i, cryin;\r |
| 334 | d10 sign, qu[2], dvd[4];\r |
| 335 | \r |
| 336 | dvd[0] = AC(ac); /* save dividend */\r |
| 337 | for (i = 1; i < 4; i++) dvd[i] = CLRS (AC(ADDAC (ac, i)));\r |
| 338 | sign = AC(ac) ^ dvr[0]; /* sign of result */\r |
| 339 | if (TSTS (AC(ac))) { /* get abs (dividend) */\r |
| 340 | for (i = 3, cryin = 1; i > 0; i--) { /* negate quad */\r |
| 341 | dvd[i] = (~dvd[i] + cryin) & MMASK; /* comp + carry in */\r |
| 342 | if (dvd[i]) cryin = 0; /* next carry in */\r |
| 343 | }\r |
| 344 | dvd[0] = (~dvd[0] + cryin) & DMASK;\r |
| 345 | }\r |
| 346 | if (TSTS (dvr[0])) { DMOVN (dvr); } /* get abs (divisor) */\r |
| 347 | else dvr[1] = CLRS (dvr[1]);\r |
| 348 | if (DCMPGE (dvd, dvr)) { /* will divide work? */\r |
| 349 | SETF (F_AOV | F_DCK | F_T1); /* no, set flags */\r |
| 350 | return;\r |
| 351 | }\r |
| 352 | qu[0] = qu[1] = 0; /* clear quotient */\r |
| 353 | for (i = 0; i < 70; i++) { /* 70 quotient bits */\r |
| 354 | dvd[0] = ((dvd[0] << 1) | ((dvd[1] >> 34) & 1)) & DMASK;;\r |
| 355 | dvd[1] = ((dvd[1] << 1) | ((dvd[2] >> 34) & 1)) & MMASK;\r |
| 356 | dvd[2] = ((dvd[2] << 1) | ((dvd[3] >> 34) & 1)) & MMASK;\r |
| 357 | dvd[3] = (dvd[3] << 1) & MMASK; /* shift dividend */\r |
| 358 | qu[0] = (qu[0] << 1) | ((qu[1] >> 34) & 1); /* shift quotient */\r |
| 359 | qu[1] = (qu[1] << 1) & MMASK;\r |
| 360 | if (DCMPGE (dvd, dvr)) { /* subtract work? */\r |
| 361 | dvd[0] = dvd[0] - dvr[0] - (dvd[1] < dvr[1]);\r |
| 362 | dvd[1] = (dvd[1] - dvr[1]) & MMASK; /* do subtract */\r |
| 363 | qu[1] = qu[1] + 1; /* set quotient bit */\r |
| 364 | }\r |
| 365 | }\r |
| 366 | if (TSTS (sign) && (qu[0] | qu[1])) { MKDNEG (qu); }\r |
| 367 | if (TSTS (AC(ac)) && (dvd[0] | dvd[1])) { MKDNEG (dvd); }\r |
| 368 | AC(ac) = qu[0]; /* quotient */\r |
| 369 | AC(ADDAC(ac, 1)) = qu[1];\r |
| 370 | AC(ADDAC(ac, 2)) = dvd[0]; /* remainder */\r |
| 371 | AC(ADDAC(ac, 3)) = dvd[1];\r |
| 372 | return;\r |
| 373 | }\r |
| 374 | \r |
| 375 | /* Single precision floating add/subtract - checked against KS10 ucode\r |
| 376 | The KS10 shifts the smaller operand regardless of the exponent diff.\r |
| 377 | This code will not shift more than 63 places; shifts beyond that\r |
| 378 | cannot change the value of the smaller operand.\r |
| 379 | \r |
| 380 | If the signs of the operands are the same, the result sign is the\r |
| 381 | same as the source sign; the sign of the result fraction is actually\r |
| 382 | part of the data. If the signs of the operands are different, the\r |
| 383 | result sign is determined by the fraction sign.\r |
| 384 | */\r |
| 385 | \r |
| 386 | d10 fad (d10 op1, d10 op2, t_bool rnd, int32 inv)\r |
| 387 | {\r |
| 388 | int32 ediff;\r |
| 389 | UFP a, b, t;\r |
| 390 | \r |
| 391 | if (inv) op2 = NEG (op2); /* subtract? -b */\r |
| 392 | if (op1 == 0) funpack (op2, 0, &a, AFRC); /* a = 0? result is b */\r |
| 393 | else if (op2 == 0) funpack (op1, 0, &a, AFRC); /* b = 0? result is a */\r |
| 394 | else {\r |
| 395 | funpack (op1, 0, &a, SFRC); /* unpack operands */\r |
| 396 | funpack (op2, 0, &b, SFRC); /* fracs are 2's comp */\r |
| 397 | ediff = a.exp - b.exp; /* get exp diff */\r |
| 398 | if (ediff < 0) { /* a < b? switch */\r |
| 399 | t = a;\r |
| 400 | a = b;\r |
| 401 | b = t;\r |
| 402 | ediff = -ediff;\r |
| 403 | }\r |
| 404 | if (ediff > 63) ediff = 63; /* cap diff at 63 */\r |
| 405 | if (ediff) b.fhi = (t_int64) b.fhi >> ediff; /* shift b (signed) */\r |
| 406 | a.fhi = a.fhi + b.fhi; /* add fractions */\r |
| 407 | if (a.sign ^ b.sign) { /* add or subtract? */\r |
| 408 | if (a.fhi & FP_UCRY) { /* subtract, frac -? */\r |
| 409 | a.fhi = UNEG (a.fhi); /* complement result */\r |
| 410 | a.sign = 1; /* result is - */\r |
| 411 | }\r |
| 412 | else a.sign = 0; /* result is + */\r |
| 413 | }\r |
| 414 | else {\r |
| 415 | if (a.sign) a.fhi = UNEG (a.fhi); /* add, src -? comp */\r |
| 416 | if (a.fhi & FP_UCRY) { /* check for carry */\r |
| 417 | a.fhi = a.fhi >> 1; /* flo won't be used */\r |
| 418 | a.exp = a.exp + 1;\r |
| 419 | }\r |
| 420 | }\r |
| 421 | }\r |
| 422 | fnorm (&a, (rnd? FP_URNDS: 0)); /* normalize, round */\r |
| 423 | return fpack (&a, NULL, FALSE);\r |
| 424 | }\r |
| 425 | \r |
| 426 | /* Single precision floating multiply. Because the fractions are 27b,\r |
| 427 | a 64b multiply can be used for the fraction multiply. The 27b\r |
| 428 | fractions are positioned 0'frac'0000, resulting in 00'hifrac'0..0.\r |
| 429 | The extra 0 is accounted for by biasing the result exponent.\r |
| 430 | */\r |
| 431 | \r |
| 432 | #define FP_V_SPM (FP_V_UFHI - (32 - FP_N_FHI - 1))\r |
| 433 | d10 fmp (d10 op1, d10 op2, t_bool rnd)\r |
| 434 | {\r |
| 435 | UFP a, b;\r |
| 436 | \r |
| 437 | funpack (op1, 0, &a, AFRC); /* unpack operands */\r |
| 438 | funpack (op2, 0, &b, AFRC); /* fracs are abs val */\r |
| 439 | if ((a.fhi == 0) || (b.fhi == 0)) return 0; /* either 0? */\r |
| 440 | a.sign = a.sign ^ b.sign; /* result sign */\r |
| 441 | a.exp = a.exp + b.exp - FP_BIAS + 1; /* result exponent */\r |
| 442 | a.fhi = (a.fhi >> FP_V_SPM) * (b.fhi >> FP_V_SPM); /* high 27b of result */\r |
| 443 | fnorm (&a, (rnd? FP_URNDS: 0)); /* normalize, round */\r |
| 444 | return fpack (&a, NULL, FALSE);\r |
| 445 | }\r |
| 446 | \r |
| 447 | /* Single precision floating divide. Because the fractions are 27b, a\r |
| 448 | 64b divide can be used for the fraction divide. Note that 28b-29b\r |
| 449 | of fraction are developed; the code will do one special normalize to\r |
| 450 | make sure that the 28th bit is not lost. Also note the special\r |
| 451 | treatment of negative quotients with non-zero remainders; this\r |
| 452 | implements the note on p2-23 of the Processor Reference Manual.\r |
| 453 | */\r |
| 454 | \r |
| 455 | t_bool fdv (d10 op1, d10 op2, d10 *rs, t_bool rnd)\r |
| 456 | {\r |
| 457 | UFP a, b;\r |
| 458 | t_uint64 savhi;\r |
| 459 | t_bool rem = FALSE;\r |
| 460 | \r |
| 461 | funpack (op1, 0, &a, AFRC); /* unpack operands */\r |
| 462 | funpack (op2, 0, &b, AFRC); /* fracs are abs val */\r |
| 463 | if (a.fhi >= 2 * b.fhi) { /* will divide work? */\r |
| 464 | SETF (F_AOV | F_DCK | F_FOV | F_T1);\r |
| 465 | return FALSE;\r |
| 466 | }\r |
| 467 | if (savhi = a.fhi) { /* dvd = 0? quo = 0 */\r |
| 468 | a.sign = a.sign ^ b.sign; /* result sign */\r |
| 469 | a.exp = a.exp - b.exp + FP_BIAS + 1; /* result exponent */\r |
| 470 | a.fhi = a.fhi / (b.fhi >> (FP_N_FHI + 1)); /* do divide */\r |
| 471 | if (a.sign && (savhi != (a.fhi * (b.fhi >> (FP_N_FHI + 1)))))\r |
| 472 | rem = TRUE; /* KL/KS hack */\r |
| 473 | a.fhi = a.fhi << (FP_V_UNORM - FP_N_FHI - 1); /* put quo in place */\r |
| 474 | if ((a.fhi & FP_UNORM) == 0) { /* normalize 1b */\r |
| 475 | a.fhi = a.fhi << 1; /* before masking */\r |
| 476 | a.exp = a.exp - 1;\r |
| 477 | }\r |
| 478 | a.fhi = a.fhi & (FP_UFHI | FP_URNDS); /* mask quo to 28b */\r |
| 479 | }\r |
| 480 | fnorm (&a, (rnd? FP_URNDS: 0)); /* normalize, round */\r |
| 481 | *rs = fpack (&a, NULL, rem); /* pack result */\r |
| 482 | return TRUE;\r |
| 483 | }\r |
| 484 | \r |
| 485 | /* Single precision floating scale. */\r |
| 486 | \r |
| 487 | d10 fsc (d10 val, a10 ea)\r |
| 488 | {\r |
| 489 | int32 sc = LIT8 (ea);\r |
| 490 | UFP a;\r |
| 491 | \r |
| 492 | if (val == 0) return 0;\r |
| 493 | funpack (val, 0, &a, AFRC); /* unpack operand */\r |
| 494 | if (ea & RSIGN) a.exp = a.exp - sc; /* adjust exponent */\r |
| 495 | else a.exp = a.exp + sc;\r |
| 496 | fnorm (&a, 0); /* renormalize */\r |
| 497 | return fpack (&a, NULL, FALSE); /* pack result */\r |
| 498 | }\r |
| 499 | \r |
| 500 | /* Float integer operand and round */\r |
| 501 | \r |
| 502 | d10 fltr (d10 mb)\r |
| 503 | {\r |
| 504 | UFP a;\r |
| 505 | d10 val = ABS (mb);\r |
| 506 | \r |
| 507 | a.sign = GET_FPSIGN (mb); /* get sign */\r |
| 508 | a.exp = FP_BIAS + 36; /* initial exponent */\r |
| 509 | a.fhi = val << (FP_V_UNORM - 35); /* left justify op */\r |
| 510 | a.flo = 0;\r |
| 511 | fnorm (&a, FP_URNDS); /* normalize, round */\r |
| 512 | return fpack (&a, NULL, FALSE); /* pack result */\r |
| 513 | }\r |
| 514 | \r |
| 515 | /* Fix and truncate/round floating operand */\r |
| 516 | \r |
| 517 | void fix (int32 ac, d10 mb, t_bool rnd)\r |
| 518 | {\r |
| 519 | int32 sc;\r |
| 520 | t_uint64 so;\r |
| 521 | UFP a;\r |
| 522 | \r |
| 523 | funpack (mb, 0, &a, AFRC); /* unpack operand */\r |
| 524 | if (a.exp > (FP_BIAS + FP_N_FHI + FP_N_EXP)) SETF (F_AOV | F_T1);\r |
| 525 | else if (a.exp < FP_BIAS) AC(ac) = 0; /* < 1/2? */\r |
| 526 | else {\r |
| 527 | sc = FP_V_UNORM - (a.exp - FP_BIAS) + 1;\r |
| 528 | AC(ac) = a.fhi >> sc;\r |
| 529 | if (rnd) {\r |
| 530 | so = a.fhi << (64 - sc);\r |
| 531 | if (so >= (0x8000000000000000 + a.sign)) AC(ac) = AC(ac) + 1;\r |
| 532 | }\r |
| 533 | if (a.sign) AC(ac) = NEG (AC(ac));\r |
| 534 | }\r |
| 535 | return;\r |
| 536 | }\r |
| 537 | \r |
| 538 | /* Double precision floating add/subtract\r |
| 539 | Since a.flo is 0, adding b.flo is just a copy - this is incorporated into\r |
| 540 | the denormalization step. If there's no denormalization, bflo is zero too.\r |
| 541 | */\r |
| 542 | \r |
| 543 | void dfad (int32 ac, d10 *rs, int32 inv)\r |
| 544 | {\r |
| 545 | int32 p1 = ADDAC (ac, 1);\r |
| 546 | int32 ediff;\r |
| 547 | UFP a, b, t;\r |
| 548 | \r |
| 549 | if (inv) { DMOVN (rs); } /* subtract? -b */\r |
| 550 | if ((AC(ac) | AC(p1)) == 0) funpack (rs[0], rs[1], &a, AFRC);\r |
| 551 | /* a == 0? sum = b */\r |
| 552 | else if ((rs[0] | rs[1]) == 0) funpack (AC(ac), AC(p1), &a, AFRC);\r |
| 553 | /* b == 0? sum = a */\r |
| 554 | else {\r |
| 555 | funpack (AC(ac), AC(p1), &a, SFRC); /* unpack operands */\r |
| 556 | funpack (rs[0], rs[1], &b, SFRC);\r |
| 557 | ediff = a.exp - b.exp; /* get exp diff */\r |
| 558 | if (ediff < 0) { /* a < b? switch */\r |
| 559 | t = a;\r |
| 560 | a = b;\r |
| 561 | b = t;\r |
| 562 | ediff = -ediff;\r |
| 563 | }\r |
| 564 | if (ediff > 127) ediff = 127; /* cap diff at 127 */\r |
| 565 | if (ediff > 63) { /* diff > 63? */\r |
| 566 | a.flo = (t_int64) b.fhi >> (ediff - 64); /* b hi to a lo */\r |
| 567 | b.fhi = b.sign? FP_ONES: 0; /* hi = all sign */\r |
| 568 | }\r |
| 569 | else if (ediff) { /* diff <= 63 */\r |
| 570 | a.flo = (b.flo >> ediff) | (b.fhi << (64 - ediff));\r |
| 571 | b.fhi = (t_int64) b.fhi >> ediff; /* shift b (signed) */\r |
| 572 | }\r |
| 573 | a.fhi = a.fhi + b.fhi; /* do add */\r |
| 574 | if (a.sign ^ b.sign) { /* add or subtract? */\r |
| 575 | if (a.fhi & FP_UCRY) { /* subtract, frac -? */\r |
| 576 | DUNEG (a); /* complement result */\r |
| 577 | a.sign = 1; /* result is - */\r |
| 578 | }\r |
| 579 | else a.sign = 0; /* result is + */\r |
| 580 | }\r |
| 581 | else {\r |
| 582 | if (a.sign) { DUNEG (a); }; /* add, src -? comp */\r |
| 583 | if (a.fhi & FP_UCRY) { /* check for carry */\r |
| 584 | a.fhi = a.fhi >> 1; /* flo won't be used */\r |
| 585 | a.exp = a.exp + 1;\r |
| 586 | }\r |
| 587 | }\r |
| 588 | }\r |
| 589 | fnorm (&a, FP_URNDD); /* normalize, round */\r |
| 590 | AC(ac) = fpack (&a, &AC(p1), FALSE); /* pack result */\r |
| 591 | return;\r |
| 592 | }\r |
| 593 | \r |
| 594 | /* Double precision floating multiply\r |
| 595 | The 62b fractions are multiplied, with cross products, to produce a\r |
| 596 | 124b fraction with two leading and two trailing 0's. Because the\r |
| 597 | product has 2 leading 0's, instead of the normal 1, an extra\r |
| 598 | normalization step is needed. Accordingly, the exponent calculation\r |
| 599 | increments the result exponent, to compensate for normalization.\r |
| 600 | */\r |
| 601 | \r |
| 602 | void dfmp (int32 ac, d10 *rs)\r |
| 603 | {\r |
| 604 | int32 p1 = ADDAC (ac, 1);\r |
| 605 | t_uint64 xh, xl, yh, yl, mid;\r |
| 606 | UFP a, b;\r |
| 607 | \r |
| 608 | funpack (AC(ac), AC(p1), &a, AFRC); /* unpack operands */\r |
| 609 | funpack (rs[0], rs[1], &b, AFRC);\r |
| 610 | if ((a.fhi == 0) || (b.fhi == 0)) { /* either 0? result 0 */\r |
| 611 | AC(ac) = AC(p1) = 0;\r |
| 612 | return;\r |
| 613 | }\r |
| 614 | a.sign = a.sign ^ b.sign; /* result sign */\r |
| 615 | a.exp = a.exp + b.exp - FP_BIAS + 1; /* result exponent */\r |
| 616 | xh = a.fhi >> 32; /* split 62b fracs */\r |
| 617 | xl = a.fhi & MSK32; /* into 32b halves */\r |
| 618 | yh = b.fhi >> 32;\r |
| 619 | yl = b.fhi & MSK32;\r |
| 620 | a.fhi = xh * yh; /* hi xproduct */\r |
| 621 | a.flo = xl * yl; /* low xproduct */\r |
| 622 | mid = (xh * yl) + (yh * xl); /* fits in 64b */\r |
| 623 | a.flo = a.flo + (mid << 32); /* add mid lo to lo */\r |
| 624 | a.fhi = a.fhi + ((mid >> 32) & MSK32) + (a.flo < (mid << 32));\r |
| 625 | fnorm (&a, FP_URNDD); /* normalize, round */\r |
| 626 | AC(ac) = fpack (&a, &AC(p1), FALSE); /* pack result */\r |
| 627 | return;\r |
| 628 | }\r |
| 629 | \r |
| 630 | /* Double precision floating divide\r |
| 631 | This algorithm develops a full 62 bits of quotient, plus one rounding\r |
| 632 | bit, in the low order 63b of a 64b number. To do this, we must assure\r |
| 633 | that the initial divide step generates a 1. If it would fail, shift\r |
| 634 | the dividend left and decrement the result exponent accordingly.\r |
| 635 | */\r |
| 636 | \r |
| 637 | void dfdv (int32 ac, d10 *rs)\r |
| 638 | {\r |
| 639 | int32 p1 = ADDAC (ac, 1);\r |
| 640 | int32 i;\r |
| 641 | t_uint64 qu = 0;\r |
| 642 | UFP a, b;\r |
| 643 | \r |
| 644 | funpack (AC(ac), AC(p1), &a, AFRC); /* unpack operands */\r |
| 645 | funpack (rs[0], rs[1], &b, AFRC);\r |
| 646 | if (a.fhi >= 2 * b.fhi) { /* will divide work? */\r |
| 647 | SETF (F_AOV | F_DCK | F_FOV | F_T1);\r |
| 648 | return;\r |
| 649 | }\r |
| 650 | if (a.fhi) { /* dvd = 0? quo = 0 */\r |
| 651 | a.sign = a.sign ^ b.sign; /* result sign */\r |
| 652 | a.exp = a.exp - b.exp + FP_BIAS + 1; /* result exponent */\r |
| 653 | if (a.fhi < b.fhi) { /* make sure initial */\r |
| 654 | a.fhi = a.fhi << 1; /* divide step will work */\r |
| 655 | a.exp = a.exp - 1;\r |
| 656 | }\r |
| 657 | for (i = 0; i < 63; i++) { /* 63b of quotient */\r |
| 658 | qu = qu << 1; /* shift quotient */\r |
| 659 | if (a.fhi >= b.fhi) { /* will div work? */\r |
| 660 | a.fhi = a.fhi - b.fhi; /* sub, quo = 1 */\r |
| 661 | qu = qu + 1;\r |
| 662 | }\r |
| 663 | a.fhi = a.fhi << 1; /* shift dividend */\r |
| 664 | }\r |
| 665 | a.fhi = qu;\r |
| 666 | }\r |
| 667 | fnorm (&a, FP_URNDD); /* normalize, round */\r |
| 668 | AC(ac) = fpack (&a, &AC(p1), FALSE); /* pack result */\r |
| 669 | return;\r |
| 670 | }\r |
| 671 | \r |
| 672 | /* Unpack floating point operand */\r |
| 673 | \r |
| 674 | void funpack (d10 h, d10 l, UFP *r, t_bool sgn)\r |
| 675 | {\r |
| 676 | d10 fphi, fplo;\r |
| 677 | \r |
| 678 | r->sign = GET_FPSIGN (h);\r |
| 679 | r->exp = GET_FPEXP (h);\r |
| 680 | fphi = GET_FPHI (h);\r |
| 681 | fplo = GET_FPLO (l);\r |
| 682 | r->fhi = (fphi << FP_V_UFHI) | (fplo << FP_V_UFLO);\r |
| 683 | r->flo = 0;\r |
| 684 | if (r->sign) {\r |
| 685 | r->exp = r->exp ^ FP_M_EXP; /* 1s comp exp */\r |
| 686 | if (sgn) { /* signed frac? */\r |
| 687 | if (r->fhi) r->fhi = r->fhi | FP_UCRY; /* extend sign */ \r |
| 688 | else {\r |
| 689 | r->exp = r->exp + 1;\r |
| 690 | r->fhi = FP_UCRY | FP_UNORM;\r |
| 691 | }\r |
| 692 | }\r |
| 693 | else { /* abs frac */\r |
| 694 | if (r->fhi) r->fhi = UNEG (r->fhi) & FP_UFRAC;\r |
| 695 | else {\r |
| 696 | r->exp = r->exp + 1;\r |
| 697 | r->fhi = FP_UNORM;\r |
| 698 | }\r |
| 699 | }\r |
| 700 | }\r |
| 701 | return;\r |
| 702 | }\r |
| 703 | \r |
| 704 | /* Normalize and optionally round floating point operand */\r |
| 705 | \r |
| 706 | void fnorm (UFP *a, t_int64 rnd)\r |
| 707 | {\r |
| 708 | int32 i;\r |
| 709 | static t_uint64 normmask[6] = {\r |
| 710 | 0x6000000000000000, 0x7800000000000000, 0x7F80000000000000,\r |
| 711 | 0x7FFF800000000000, 0x7FFFFFFF80000000, 0x7FFFFFFFFFFFFFFF\r |
| 712 | };\r |
| 713 | static int32 normtab[7] = { 1, 2, 4, 8, 16, 32, 63 };\r |
| 714 | extern a10 pager_PC;\r |
| 715 | \r |
| 716 | if (a->fhi & FP_UCRY) { /* carry set? */\r |
| 717 | printf ("%%PDP-10 FP: carry bit set at normalization, PC = %o\n", pager_PC);\r |
| 718 | a->flo = (a->flo >> 1) | ((a->fhi & 1) << 63); /* try to recover */\r |
| 719 | a->fhi = a->fhi >> 1; /* but root cause */\r |
| 720 | a->exp = a->exp + 1; /* should be fixed! */\r |
| 721 | }\r |
| 722 | if ((a->fhi | a->flo) == 0) { /* if fraction = 0 */\r |
| 723 | a->sign = a->exp = 0; /* result is 0 */\r |
| 724 | return;\r |
| 725 | }\r |
| 726 | while ((a->fhi & FP_UNORM) == 0) { /* normalized? */\r |
| 727 | for (i = 0; i < 6; i++) {\r |
| 728 | if (a->fhi & normmask[i]) break;\r |
| 729 | }\r |
| 730 | a->fhi = (a->fhi << normtab[i]) | (a->flo >> (64 - normtab[i]));\r |
| 731 | a->flo = a->flo << normtab[i];\r |
| 732 | a->exp = a->exp - normtab[i];\r |
| 733 | }\r |
| 734 | if (rnd) { /* rounding? */\r |
| 735 | a->fhi = a->fhi + rnd; /* add round const */\r |
| 736 | if (a->fhi & FP_UCRY) { /* if carry out, */\r |
| 737 | a->fhi = a->fhi >> 1; /* renormalize */\r |
| 738 | a->exp = a->exp + 1;\r |
| 739 | }\r |
| 740 | }\r |
| 741 | return;\r |
| 742 | }\r |
| 743 | \r |
| 744 | /* Pack floating point result */\r |
| 745 | \r |
| 746 | d10 fpack (UFP *r, d10 *lo, t_bool fdvneg)\r |
| 747 | {\r |
| 748 | d10 val[2];\r |
| 749 | \r |
| 750 | if (r->exp < 0) SETF (F_AOV | F_FOV | F_FXU | F_T1);\r |
| 751 | else if (r->exp > FP_M_EXP) SETF (F_AOV | F_FOV | F_T1);\r |
| 752 | val[0] = (((((d10) r->exp) & FP_M_EXP) << FP_V_EXP) |\r |
| 753 | ((r->fhi & FP_UFHI) >> FP_V_UFHI)) & DMASK;\r |
| 754 | if (lo) val[1] = ((r->fhi & FP_UFLO) >> FP_V_UFLO) & MMASK;\r |
| 755 | else val[1] = 0;\r |
| 756 | if (r->sign) { /* negate? */\r |
| 757 | if (fdvneg) { /* fdvr special? */\r |
| 758 | val[1] = ~val[1] & MMASK; /* 1's comp */\r |
| 759 | val[0] = ~val[0] & DMASK;\r |
| 760 | }\r |
| 761 | else { DMOVN (val); } /* 2's comp */\r |
| 762 | }\r |
| 763 | if (lo) *lo = val[1];\r |
| 764 | return val[0];\r |
| 765 | }\r |