| 1 | /* hp2100_fp1.c: HP 1000 multiple-precision floating point routines\r |
| 2 | \r |
| 3 | Copyright (c) 2005-2008, J. David Bryan\r |
| 4 | \r |
| 5 | Permission is hereby granted, free of charge, to any person obtaining a\r |
| 6 | copy of this software and associated documentation files (the "Software"),\r |
| 7 | to deal in the Software without restriction, including without limitation\r |
| 8 | the rights to use, copy, modify, merge, publish, distribute, sublicense,\r |
| 9 | and/or sell copies of the Software, and to permit persons to whom the\r |
| 10 | Software is furnished to do so, subject to the following conditions:\r |
| 11 | \r |
| 12 | The above copyright notice and this permission notice shall be included in\r |
| 13 | all copies or substantial portions of the Software.\r |
| 14 | \r |
| 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\r |
| 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\r |
| 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\r |
| 18 | THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER\r |
| 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r |
| 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\r |
| 21 | \r |
| 22 | Except as contained in this notice, the name of the author shall not be used\r |
| 23 | in advertising or otherwise to promote the sale, use or other dealings in\r |
| 24 | this Software without prior written authorization from the author.\r |
| 25 | \r |
| 26 | 10-May-08 JDB Fixed uninitialized return in fp_accum when setting\r |
| 27 | 19-Mar-08 JDB Reworked "complement" to avoid inlining bug in gcc-4.x\r |
| 28 | 01-Dec-06 JDB Reworked into generalized multiple-precision ops for FPP\r |
| 29 | 12-Oct-06 JDB Altered x_trun for F-Series FFP compatibility\r |
| 30 | Added F-Series ..TCM FFP helpers\r |
| 31 | \r |
| 32 | Primary references:\r |
| 33 | - HP 1000 M/E/F-Series Computers Engineering and Reference Documentation\r |
| 34 | (92851-90001, Mar-1981)\r |
| 35 | - HP 1000 M/E/F-Series Computers Technical Reference Handbook\r |
| 36 | (5955-0282, Mar-1980)\r |
| 37 | - DOS/RTE Relocatable Library Reference Manual\r |
| 38 | (24998-90001, Oct-1981)\r |
| 39 | \r |
| 40 | \r |
| 41 | This module implements multiple-precision floating-point operations to\r |
| 42 | support the 1000 F-Series hardware Floating Point Processor. It employs\r |
| 43 | 64-bit integer arithmetic for speed and simplicity of implementation. The\r |
| 44 | host compiler must support 64-bit integers, and the HAVE_INT64 symbol must be\r |
| 45 | defined during compilation. If this symbol is not defined, then FPP support\r |
| 46 | is not available.\r |
| 47 | \r |
| 48 | HP 2100/1000 computers used a proprietary floating-point format. The 2100\r |
| 49 | had optional firmware that provided two-word floating-point add, subtract,\r |
| 50 | multiply, and divide, as well as single-integer fix and float. The 1000-M/E\r |
| 51 | provided the same two-word firmware operations as standard equipment.\r |
| 52 | Three-word extended-precision instructions for the 2100 and 1000-M/E were\r |
| 53 | provided by the optional Fast FORTRAN Processor firmware.\r |
| 54 | \r |
| 55 | The 1000-F substituted a hardware floating point processor for the firmware\r |
| 56 | in previous machines. In addition to the two- and three-word formats, the\r |
| 57 | F-Series introduced a four-word double-precision format. A five-word format\r |
| 58 | that provided extra range in the exponent by unpacking it from the mantissa\r |
| 59 | was also provided, although this capability was not documented in the user\r |
| 60 | manual. In addition, the FPP improved the accuracy of floating-point\r |
| 61 | calculations, as the firmware versions sacrificed a few bits of precision to\r |
| 62 | gain speed. Consequently, operations on the F-Series may return results that\r |
| 63 | differ slightly from the same operations on the M/E-Series or the 2100.\r |
| 64 | \r |
| 65 | F-Series units after date code 1920 also provided two-word double-integer\r |
| 66 | instructions in firmware, as well as double-integer fix and float operations.\r |
| 67 | \r |
| 68 | The original 32-bit floating-point format is as follows:\r |
| 69 | \r |
| 70 | 15 14 0\r |
| 71 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r |
| 72 | |MS| mantissa high | : M\r |
| 73 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r |
| 74 | | mantissa low | exponent |XS| : M + 1\r |
| 75 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r |
| 76 | 15 8 7 1 0\r |
| 77 | \r |
| 78 | Both 23-bit mantissa and 7-bit exponent are in twos-complement form. The\r |
| 79 | exponent sign bit has been rotated into the LSB of the second word.\r |
| 80 | \r |
| 81 | The extended-precision floating-point format is a 48-bit extension of the\r |
| 82 | 32-bit format used for single precision. A packed extended-precision value\r |
| 83 | consists of a 39-bit mantissa and a 7-bit exponent. The format is as\r |
| 84 | follows:\r |
| 85 | \r |
| 86 | 15 14 0\r |
| 87 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r |
| 88 | |MS| mantissa high | : M\r |
| 89 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r |
| 90 | | mantissa middle | : M + 1\r |
| 91 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r |
| 92 | | mantissa low | exponent |XS| : M + 2\r |
| 93 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r |
| 94 | 15 8 7 1 0\r |
| 95 | \r |
| 96 | The double-precision floating-point format is similar to the 48-bit\r |
| 97 | extended-precision format, although with a 55-bit mantissa:\r |
| 98 | \r |
| 99 | 15 14 0\r |
| 100 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r |
| 101 | |MS| mantissa high | : M\r |
| 102 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r |
| 103 | | mantissa middle high | : M + 1\r |
| 104 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r |
| 105 | | mantissa middle low | : M + 2\r |
| 106 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r |
| 107 | | mantissa low | exponent |XS| : M + 3\r |
| 108 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r |
| 109 | 15 8 7 1 0\r |
| 110 | \r |
| 111 | The FPP also supports a special five-word expanded-exponent format:\r |
| 112 | \r |
| 113 | 15 14 0\r |
| 114 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r |
| 115 | |MS| mantissa high | : M\r |
| 116 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r |
| 117 | | mantissa middle high | : M + 1\r |
| 118 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r |
| 119 | | mantissa middle low | : M + 2\r |
| 120 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r |
| 121 | | mantissa low | : M + 3\r |
| 122 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r |
| 123 | | exponent |XS| : M + 4\r |
| 124 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r |
| 125 | 15 8 7 1 0\r |
| 126 | \r |
| 127 | The exponent is a full 16-bit twos-complement value, but the allowed range is\r |
| 128 | only 10 bits, i.e., -512 to +511.\r |
| 129 | \r |
| 130 | In a normalized value, the sign and MSB of the mantissa differ. Zero is\r |
| 131 | represented by all words = 0.\r |
| 132 | \r |
| 133 | Internally, unpacked floating-point values are contained in a structure\r |
| 134 | having a signed 64-bit mantissa and a signed 32-bit exponent. Mantissas are\r |
| 135 | left-justified with the unused bits masked to zero. Exponents are\r |
| 136 | right-justified. The precision is indicated by the value of a structure\r |
| 137 | field.\r |
| 138 | \r |
| 139 | HP terminology for the three-word floating-point format is confused. Some\r |
| 140 | documents refer to it as "double precision," while others use "extended\r |
| 141 | precision." The instruction mnemonics begin with "X" (e.g., .XADD),\r |
| 142 | suggesting the extended-precision term.\r |
| 143 | \r |
| 144 | HP apparently intended that the four-word double-precision format would be\r |
| 145 | called "triple-precision," as the instruction mnemonics begin with "T" (e.g.,\r |
| 146 | ".TADD" for the four-word add instruction). The source files for the\r |
| 147 | software simulations of these instructions for the M/E-Series also explicitly\r |
| 148 | refer to "triple precision math." However, the engineering documentation and\r |
| 149 | the F-Series reference manual both use the double-precision term.\r |
| 150 | \r |
| 151 | This module adopts the single/extended/double terminology and uses the\r |
| 152 | initial letters of the instructions (F/X/T) to indicate the precision used.\r |
| 153 | \r |
| 154 | The FPP hardware consisted of two circuit boards that interfaced to the main\r |
| 155 | CPU via the Microprogammable Processor Port (MPP) that had been introduced\r |
| 156 | with the 1000 E-Series. One board contained argument registers and ALUs,\r |
| 157 | split into separate mantissa and exponent parts. The other contained a state\r |
| 158 | machine sequencer. FPP results were copied automatically to the argument\r |
| 159 | registers in addition to being available over the MPP, so that chained\r |
| 160 | operations could be executed from these "accumulators" without reloading.\r |
| 161 | \r |
| 162 | The FPP operated independently of the CPU. An opcode, specifying one of the\r |
| 163 | six operations (add, subtract, multiply, divide, fix, or float) was sent to\r |
| 164 | the FPP, and a start command was given. Operands of appropriate precision\r |
| 165 | were then supplied to the FPP. Once the operands were received, the FPP\r |
| 166 | would execute and set a flag when the operation was complete. The result\r |
| 167 | would then be retrieved from the FPP. The floating-point instruction\r |
| 168 | firmware in the CPU initiated the desired FPP operation and handled operand\r |
| 169 | reads from and result writes to main memory.\r |
| 170 | \r |
| 171 | Under simulation, "fp_exec" provides the six arithmetic operations analogous\r |
| 172 | to FPP execution. The remainder of the functions are helpers that were\r |
| 173 | provided by firmware in the 1000-F but that can reuse code needed to simulate\r |
| 174 | the FPP hardware. As with the hardware, "fp_exec" retains the last result\r |
| 175 | in an internal accumulator that may be referenced in subsequent operations.\r |
| 176 | \r |
| 177 | NOTE: this module also provides the floating-point support for the firmware\r |
| 178 | single-precision 1000-M/E base set and extended-precision FFP instructions.\r |
| 179 | Because the firmware and hardware implementations returned slightly different\r |
| 180 | results, particularly with respect to round-off, conditional checks are\r |
| 181 | implemented in the arithmetic routines. In some cases, entirely different\r |
| 182 | algorithms are used to ensure fidelity with the real machines. Functionally,\r |
| 183 | this means that the 2100/1000-M/E and 1000-F floating-point diagnostics are\r |
| 184 | not interchangeable, and failures are to be expected if a diagnostic is run\r |
| 185 | on the wrong machine.\r |
| 186 | */\r |
| 187 | \r |
| 188 | #include "hp2100_defs.h"\r |
| 189 | #include "hp2100_cpu.h"\r |
| 190 | #include "hp2100_cpu1.h"\r |
| 191 | #include "hp2100_fp1.h"\r |
| 192 | \r |
| 193 | \r |
| 194 | #if defined (HAVE_INT64) /* we need int64 support */\r |
| 195 | \r |
| 196 | /* Field widths. */\r |
| 197 | \r |
| 198 | #define IN_W_SIGN 1\r |
| 199 | #define IN_W_SMAGN 15\r |
| 200 | #define IN_W_DMAGN 31\r |
| 201 | \r |
| 202 | #define FP_W_MSIGN 1\r |
| 203 | #define FP_W_FMANT 23\r |
| 204 | #define FP_W_XMANT 39\r |
| 205 | #define FP_W_TMANT 55\r |
| 206 | #define FP_W_EMANT 55\r |
| 207 | #define FP_W_EXPANDEXP 9\r |
| 208 | #define FP_W_EXP 7\r |
| 209 | #define FP_W_ESIGN 1\r |
| 210 | \r |
| 211 | /* Starting bit numbers. */\r |
| 212 | \r |
| 213 | #define IN_V_SIGN (64 - IN_W_SIGN)\r |
| 214 | #define IN_V_SNUM (64 - IN_W_SIGN - IN_W_SMAGN)\r |
| 215 | #define IN_V_DNUM (64 - IN_W_SIGN - IN_W_DMAGN)\r |
| 216 | \r |
| 217 | #define FP_V_FNUM (64 - FP_W_MSIGN - FP_W_FMANT - FP_W_EXP - FP_W_ESIGN)\r |
| 218 | #define FP_V_XNUM (64 - FP_W_MSIGN - FP_W_XMANT - FP_W_EXP - FP_W_ESIGN)\r |
| 219 | #define FP_V_TNUM (64 - FP_W_MSIGN - FP_W_TMANT - FP_W_EXP - FP_W_ESIGN)\r |
| 220 | #define FP_V_ENUM (64 - FP_W_MSIGN - FP_W_EMANT - FP_W_EXP - FP_W_ESIGN)\r |
| 221 | \r |
| 222 | #define FP_V_MSIGN (64 - FP_W_MSIGN)\r |
| 223 | #define FP_V_FMANT (64 - FP_W_MSIGN - FP_W_FMANT)\r |
| 224 | #define FP_V_XMANT (64 - FP_W_MSIGN - FP_W_XMANT)\r |
| 225 | #define FP_V_TMANT (64 - FP_W_MSIGN - FP_W_TMANT)\r |
| 226 | #define FP_V_EMANT (64 - FP_W_MSIGN - FP_W_EMANT)\r |
| 227 | #define FP_V_EXP 1\r |
| 228 | #define FP_V_ESIGN 0\r |
| 229 | \r |
| 230 | /* Right-aligned field masks. */\r |
| 231 | \r |
| 232 | #define IN_M_SIGN (((t_uint64) 1 << IN_W_SIGN) - 1)\r |
| 233 | #define IN_M_SMAGN (((t_uint64) 1 << IN_W_SMAGN) - 1)\r |
| 234 | #define IN_M_DMAGN (((t_uint64) 1 << IN_W_DMAGN) - 1)\r |
| 235 | \r |
| 236 | #define FP_M_MSIGN (((t_uint64) 1 << FP_W_MSIGN) - 1)\r |
| 237 | #define FP_M_FMANT (((t_uint64) 1 << FP_W_FMANT) - 1)\r |
| 238 | #define FP_M_XMANT (((t_uint64) 1 << FP_W_XMANT) - 1)\r |
| 239 | #define FP_M_TMANT (((t_uint64) 1 << FP_W_TMANT) - 1)\r |
| 240 | #define FP_M_EMANT (((t_uint64) 1 << FP_W_EMANT) - 1)\r |
| 241 | \r |
| 242 | #define FP_M_EXPANDEXP ((1 << FP_W_EXPANDEXP) - 1)\r |
| 243 | #define FP_M_EXP ((1 << FP_W_EXP) - 1)\r |
| 244 | #define FP_M_ESIGN ((1 << FP_W_ESIGN) - 1)\r |
| 245 | \r |
| 246 | /* In-place field masks. */\r |
| 247 | \r |
| 248 | #define IN_SIGN (IN_M_SIGN << IN_V_SIGN)\r |
| 249 | #define IN_SMAGN (IN_M_SMAGN << IN_V_SNUM)\r |
| 250 | #define IN_DMAGN (IN_M_DMAGN << IN_V_DNUM)\r |
| 251 | \r |
| 252 | #define FP_MSIGN (FP_M_MSIGN << FP_V_MSIGN)\r |
| 253 | #define FP_FMANT (FP_M_FMANT << FP_V_FMANT)\r |
| 254 | #define FP_XMANT (FP_M_XMANT << FP_V_XMANT)\r |
| 255 | #define FP_TMANT (FP_M_TMANT << FP_V_TMANT)\r |
| 256 | #define FP_EMANT (FP_M_EMANT << FP_V_EMANT)\r |
| 257 | #define FP_EXP (FP_M_EXP << FP_V_EXP)\r |
| 258 | #define FP_ESIGN (FP_M_ESIGN << FP_V_ESIGN)\r |
| 259 | \r |
| 260 | /* In-place record masks. */\r |
| 261 | \r |
| 262 | #define IN_SSMAGN (IN_SIGN | IN_SMAGN)\r |
| 263 | #define IN_SDMAGN (IN_SIGN | IN_DMAGN)\r |
| 264 | \r |
| 265 | #define FP_SFMANT (FP_MSIGN | FP_FMANT)\r |
| 266 | #define FP_SXMANT (FP_MSIGN | FP_XMANT)\r |
| 267 | #define FP_STMANT (FP_MSIGN | FP_TMANT)\r |
| 268 | #define FP_SEMANT (FP_MSIGN | FP_EMANT)\r |
| 269 | #define FP_SEXP (FP_ESIGN | FP_EXP)\r |
| 270 | \r |
| 271 | /* Minima and maxima. */\r |
| 272 | \r |
| 273 | #define FP_ONEHALF ((t_int64) 1 << (FP_V_MSIGN - 1)) /* mantissa = 0.5 */\r |
| 274 | #define FP_MAXPMANT ((t_int64) FP_EMANT) /* maximum pos mantissa */\r |
| 275 | #define FP_MAXNMANT ((t_int64) FP_MSIGN) /* maximum neg mantissa */\r |
| 276 | #define FP_MAXPEXP (FP_M_EXPANDEXP) /* maximum pos expanded exponent */\r |
| 277 | #define FP_MAXNEXP (-(FP_MAXPEXP + 1)) /* maximum neg expanded exponent */\r |
| 278 | \r |
| 279 | /* Floating-point helpers. */\r |
| 280 | \r |
| 281 | #define DENORM(x) ((((x) ^ (x) << 1) & FP_MSIGN) == 0)\r |
| 282 | \r |
| 283 | #define TO_EXP(e) (int8) ((e >> FP_V_EXP & FP_M_EXP) | \\r |
| 284 | (e & FP_M_ESIGN ? ~FP_M_EXP : 0))\r |
| 285 | \r |
| 286 | /* Property constants. */\r |
| 287 | \r |
| 288 | static const t_int64 p_half_lsb[6] = { ((t_int64) 1 << IN_V_SNUM) - 1, /* different than FP! */\r |
| 289 | ((t_int64) 1 << IN_V_DNUM) - 1, /* different than FP! */\r |
| 290 | (t_int64) 1 << (FP_V_FMANT - 1),\r |
| 291 | (t_int64) 1 << (FP_V_XMANT - 1),\r |
| 292 | (t_int64) 1 << (FP_V_TMANT - 1),\r |
| 293 | (t_int64) 1 << (FP_V_EMANT - 1) };\r |
| 294 | \r |
| 295 | static const t_int64 n_half_lsb[6] = { 0,\r |
| 296 | 0,\r |
| 297 | ((t_int64) 1 << (FP_V_FMANT - 1)) - 1,\r |
| 298 | ((t_int64) 1 << (FP_V_XMANT - 1)) - 1,\r |
| 299 | ((t_int64) 1 << (FP_V_TMANT - 1)) - 1,\r |
| 300 | ((t_int64) 1 << (FP_V_EMANT - 1)) - 1 };\r |
| 301 | \r |
| 302 | static const uint32 op_start[6] = { IN_V_SNUM,\r |
| 303 | IN_V_DNUM,\r |
| 304 | FP_V_FMANT,\r |
| 305 | FP_V_XMANT,\r |
| 306 | FP_V_TMANT,\r |
| 307 | FP_V_EMANT };\r |
| 308 | \r |
| 309 | static const t_int64 mant_mask[6] = { IN_SSMAGN,\r |
| 310 | IN_SDMAGN,\r |
| 311 | FP_SFMANT,\r |
| 312 | FP_SXMANT,\r |
| 313 | FP_STMANT,\r |
| 314 | FP_SEMANT };\r |
| 315 | \r |
| 316 | static const uint32 op_bits[6] = { IN_W_SMAGN,\r |
| 317 | IN_W_DMAGN,\r |
| 318 | FP_W_FMANT + FP_W_MSIGN,\r |
| 319 | FP_W_XMANT + FP_W_MSIGN,\r |
| 320 | FP_W_TMANT + FP_W_MSIGN,\r |
| 321 | FP_W_EMANT + FP_W_MSIGN };\r |
| 322 | \r |
| 323 | static const t_int64 op_mask[6] = { ~(t_int64) 0 << IN_V_SNUM,\r |
| 324 | ~(t_int64) 0 << IN_V_DNUM,\r |
| 325 | ~(t_int64) 0 << FP_V_FNUM,\r |
| 326 | ~(t_int64) 0 << FP_V_XNUM,\r |
| 327 | ~(t_int64) 0 << FP_V_TNUM,\r |
| 328 | ~(t_int64) 0 << FP_V_ENUM };\r |
| 329 | \r |
| 330 | static const uint32 int_p_max[2] = { IN_M_SMAGN,\r |
| 331 | IN_M_DMAGN };\r |
| 332 | \r |
| 333 | \r |
| 334 | /* Internal unpacked floating-point representation. */\r |
| 335 | \r |
| 336 | typedef struct {\r |
| 337 | t_int64 mantissa;\r |
| 338 | int32 exponent;\r |
| 339 | OPSIZE precision;\r |
| 340 | } FPU;\r |
| 341 | \r |
| 342 | \r |
| 343 | \r |
| 344 | /* Low-level helper routines. */\r |
| 345 | \r |
| 346 | \r |
| 347 | /* Arithmetic shift right for mantissa only.\r |
| 348 | \r |
| 349 | Returns TRUE if any one-bits are shifted out (for F-series only).\r |
| 350 | */\r |
| 351 | \r |
| 352 | static t_bool asr (FPU *operand, int32 shift)\r |
| 353 | {\r |
| 354 | t_uint64 mask;\r |
| 355 | t_bool bits_lost;\r |
| 356 | \r |
| 357 | if (UNIT_CPU_MODEL == UNIT_1000_F) { /* F-series? */\r |
| 358 | mask = ((t_uint64) 1 << shift) - 1; /* mask for lost bits */\r |
| 359 | bits_lost = ((operand->mantissa & mask) != 0); /* flag if any lost */\r |
| 360 | }\r |
| 361 | else\r |
| 362 | bits_lost = FALSE;\r |
| 363 | \r |
| 364 | operand->mantissa = operand->mantissa >> shift; /* mantissa is int, so ASR */\r |
| 365 | return bits_lost;\r |
| 366 | }\r |
| 367 | \r |
| 368 | \r |
| 369 | /* Logical shift right for mantissa and exponent.\r |
| 370 | \r |
| 371 | Shifts mantissa and corrects exponent for mantissa overflow.\r |
| 372 | Returns TRUE if any one-bits are shifted out (for F-series only).\r |
| 373 | */\r |
| 374 | \r |
| 375 | static t_bool lsrx (FPU *operand, int32 shift)\r |
| 376 | {\r |
| 377 | t_uint64 mask;\r |
| 378 | t_bool bits_lost;\r |
| 379 | \r |
| 380 | if (UNIT_CPU_MODEL == UNIT_1000_F) { /* F-series? */\r |
| 381 | mask = ((t_uint64) 1 << shift) - 1; /* mask for lost bits */\r |
| 382 | bits_lost = ((operand->mantissa & mask) != 0); /* flag if any lost */\r |
| 383 | }\r |
| 384 | else\r |
| 385 | bits_lost = FALSE;\r |
| 386 | \r |
| 387 | operand->mantissa = (t_uint64) operand->mantissa >> shift; /* uint, so LSR */\r |
| 388 | operand->exponent = operand->exponent + shift; /* correct exponent */\r |
| 389 | return bits_lost;\r |
| 390 | }\r |
| 391 | \r |
| 392 | \r |
| 393 | /* Unpack an operand into a long integer.\r |
| 394 | \r |
| 395 | Returns a left-aligned integer or mantissa. Does not mask to precision; this\r |
| 396 | should be done subsequently if desired.\r |
| 397 | */\r |
| 398 | \r |
| 399 | static t_int64 unpack_int (OP packed, OPSIZE precision)\r |
| 400 | {\r |
| 401 | uint32 i;\r |
| 402 | t_uint64 unpacked = 0;\r |
| 403 | \r |
| 404 | if (precision == in_s)\r |
| 405 | unpacked = (t_uint64) packed.word << 48; /* unpack single integer */\r |
| 406 | \r |
| 407 | else if (precision == in_d)\r |
| 408 | unpacked = (t_uint64) packed.dword << 32; /* unpack double integer */\r |
| 409 | \r |
| 410 | else {\r |
| 411 | if (precision == fp_e) /* five word operand? */\r |
| 412 | precision = fp_t; /* only four mantissa words */\r |
| 413 | \r |
| 414 | for (i = 0; i < 4; i++) /* unpack fp 2 to 4 words */\r |
| 415 | if (i < TO_COUNT (precision))\r |
| 416 | unpacked = unpacked << 16 | packed.fpk[i];\r |
| 417 | else\r |
| 418 | unpacked = unpacked << 16;\r |
| 419 | }\r |
| 420 | \r |
| 421 | return (t_int64) unpacked;\r |
| 422 | }\r |
| 423 | \r |
| 424 | \r |
| 425 | /* Unpack a packed operand.\r |
| 426 | \r |
| 427 | The packed value is split into separate mantissa and exponent variables. The\r |
| 428 | multiple words of the mantissa are concatenated into a single 64-bit signed\r |
| 429 | value, and the exponent is shifted with recovery of the sign.\r |
| 430 | */\r |
| 431 | \r |
| 432 | static FPU unpack (OP packed, OPSIZE precision)\r |
| 433 | {\r |
| 434 | FPU unpacked;\r |
| 435 | \r |
| 436 | unpacked.precision = precision; /* set value's precision */\r |
| 437 | \r |
| 438 | unpacked.mantissa = /* unpack and mask mantissa */\r |
| 439 | unpack_int (packed, precision) & mant_mask[precision];\r |
| 440 | \r |
| 441 | switch (precision) {\r |
| 442 | \r |
| 443 | case fp_f:\r |
| 444 | case fp_x:\r |
| 445 | case fp_t:\r |
| 446 | unpacked.exponent = /* unpack exponent from correct word */\r |
| 447 | TO_EXP (packed.fpk[(uint32) precision - 1]);\r |
| 448 | break;\r |
| 449 | \r |
| 450 | case fp_e:\r |
| 451 | unpacked.exponent = /* unpack expanded exponent */\r |
| 452 | (int16) (packed.fpk[4] >> FP_V_EXP | /* rotate sign into place */\r |
| 453 | (packed.fpk[4] & 1 ? SIGN : 0));\r |
| 454 | break;\r |
| 455 | \r |
| 456 | case fp_a: /* no action for value in accum */\r |
| 457 | case in_s: /* integers don't use exponent */\r |
| 458 | case in_d: /* integers don't use exponent */\r |
| 459 | default:\r |
| 460 | unpacked.exponent = 0;\r |
| 461 | break;\r |
| 462 | }\r |
| 463 | \r |
| 464 | return unpacked;\r |
| 465 | }\r |
| 466 | \r |
| 467 | \r |
| 468 | /* Pack a long integer into an operand. */\r |
| 469 | \r |
| 470 | static OP pack_int (t_int64 unpacked, OPSIZE precision)\r |
| 471 | {\r |
| 472 | int32 i;\r |
| 473 | OP packed;\r |
| 474 | \r |
| 475 | if (precision == in_s)\r |
| 476 | packed.word = (uint16) (unpacked >> 48) & DMASK; /* pack single integer */\r |
| 477 | \r |
| 478 | else if (precision == in_d)\r |
| 479 | packed.dword = (uint32) (unpacked >> 32) & DMASK32; /* pack double integer */\r |
| 480 | \r |
| 481 | else {\r |
| 482 | if (precision == fp_e) /* five word operand? */\r |
| 483 | precision = fp_t; /* only four mantissa words */\r |
| 484 | \r |
| 485 | for (i = 3; i >= 0; i--) { /* pack fp 2 to 4 words */\r |
| 486 | packed.fpk[i] = (uint16) unpacked & DMASK;\r |
| 487 | unpacked = unpacked >> 16;\r |
| 488 | }\r |
| 489 | }\r |
| 490 | \r |
| 491 | return packed;\r |
| 492 | }\r |
| 493 | \r |
| 494 | \r |
| 495 | /* Pack an unpacked floating-point number.\r |
| 496 | \r |
| 497 | The 64-bit mantissa is split into the appropriate number of 16-bit words.\r |
| 498 | The exponent is rotated to incorporate the sign bit and merged into the\r |
| 499 | appropriate word.\r |
| 500 | */\r |
| 501 | \r |
| 502 | static OP pack (FPU unpacked)\r |
| 503 | {\r |
| 504 | OP packed;\r |
| 505 | uint8 exp;\r |
| 506 | \r |
| 507 | packed = pack_int (unpacked.mantissa, unpacked.precision); /* pack mantissa */\r |
| 508 | \r |
| 509 | exp = ((uint8) unpacked.exponent << FP_V_EXP) | /* rotate exponent */\r |
| 510 | ((unpacked.exponent < 0) << FP_V_ESIGN);\r |
| 511 | \r |
| 512 | switch (unpacked.precision) { /* merge exponent into correct word */\r |
| 513 | \r |
| 514 | case in_s: /* no action for integers */\r |
| 515 | case in_d:\r |
| 516 | break;\r |
| 517 | \r |
| 518 | case fp_f: /* merge into last word */\r |
| 519 | case fp_x:\r |
| 520 | case fp_t:\r |
| 521 | packed.fpk[(uint32) unpacked.precision - 1] =\r |
| 522 | (packed.fpk[(uint32) unpacked.precision - 1] & ~FP_SEXP) | exp;\r |
| 523 | break;\r |
| 524 | \r |
| 525 | case fp_e: /* place in separate word */\r |
| 526 | packed.fpk[4] = ((uint16) unpacked.exponent << FP_V_EXP) |\r |
| 527 | ((unpacked.exponent < 0) << FP_V_ESIGN);\r |
| 528 | break;\r |
| 529 | \r |
| 530 | case fp_a: /* no action for value in accum */\r |
| 531 | break;\r |
| 532 | }\r |
| 533 | \r |
| 534 | return packed;\r |
| 535 | }\r |
| 536 | \r |
| 537 | \r |
| 538 | /* Normalize an unpacked floating-point number.\r |
| 539 | \r |
| 540 | Floating-point numbers are in normal form if the sign bit and the MSB of the\r |
| 541 | mantissa differ. Unnormalized numbers are shifted as needed with appropriate\r |
| 542 | exponent modification.\r |
| 543 | */\r |
| 544 | \r |
| 545 | static void normalize (FPU *unpacked)\r |
| 546 | {\r |
| 547 | \r |
| 548 | if (unpacked->mantissa) /* non-zero? */\r |
| 549 | while (DENORM (unpacked->mantissa)) { /* normal form? */\r |
| 550 | unpacked->exponent = unpacked->exponent - 1; /* no, so left shift */\r |
| 551 | unpacked->mantissa = unpacked->mantissa << 1;\r |
| 552 | }\r |
| 553 | else\r |
| 554 | unpacked->exponent = 0; /* clean for zero */\r |
| 555 | return;\r |
| 556 | }\r |
| 557 | \r |
| 558 | \r |
| 559 | /* Round an unpacked floating-point number and check for overflow.\r |
| 560 | \r |
| 561 | An unpacked floating-point number is rounded by adding one-half of the LSB\r |
| 562 | value, maintaining symmetry around zero. If rounding resulted in a mantissa\r |
| 563 | overflow, the result logically is shifted to the right with an appropriate\r |
| 564 | exponent modification. Finally, the result is checked for exponent underflow\r |
| 565 | or overflow, and the appropriate approximation (zero or infinity) is\r |
| 566 | returned.\r |
| 567 | \r |
| 568 | Rounding in hardware involves a special mantissa extension register that\r |
| 569 | holds three "guard" bits and one "sticky" bit. These represent the value of\r |
| 570 | bits right-shifted out the mantissa register. Under simulation, we track\r |
| 571 | such right-shifts and utilize the lower eight bits of the 64-bit mantissa\r |
| 572 | value to simulate the extension register.\r |
| 573 | \r |
| 574 | Overflow depends on whether the FPP expanded-exponent form is being used\r |
| 575 | (this expands the exponent range by two bits). If overflow is detected, the\r |
| 576 | value representing infinity is dependent on whether the operation is on\r |
| 577 | behalf of the Fast FORTRAN Processor. The F-Series FPP returns positive\r |
| 578 | infinity on both positive and negative overflow for all precisions. The 2100\r |
| 579 | and M/E-Series FFPs return negative infinity on negative overflow of\r |
| 580 | extended-precision values. Single-precision overflows on these machines\r |
| 581 | always return positive infinity.\r |
| 582 | \r |
| 583 | The number to be rounded must be normalized upon entry.\r |
| 584 | */\r |
| 585 | \r |
| 586 | static uint32 roundovf (FPU *unpacked, t_bool expand)\r |
| 587 | {\r |
| 588 | uint32 overflow;\r |
| 589 | t_bool sign;\r |
| 590 | \r |
| 591 | sign = (unpacked->mantissa < 0); /* save mantissa sign */\r |
| 592 | \r |
| 593 | if (sign) /* round and mask the number */\r |
| 594 | unpacked->mantissa =\r |
| 595 | (unpacked->mantissa + n_half_lsb[unpacked->precision]) &\r |
| 596 | mant_mask[unpacked->precision];\r |
| 597 | else\r |
| 598 | unpacked->mantissa =\r |
| 599 | (unpacked->mantissa + p_half_lsb[unpacked->precision]) &\r |
| 600 | mant_mask[unpacked->precision];\r |
| 601 | \r |
| 602 | if (sign != (unpacked->mantissa < 0)) /* mantissa overflow? */\r |
| 603 | lsrx (unpacked, 1); /* correct by shifting */\r |
| 604 | else\r |
| 605 | normalize (unpacked); /* renorm may be needed */\r |
| 606 | \r |
| 607 | if (unpacked->mantissa == 0) { /* result zero? */\r |
| 608 | unpacked->mantissa = 0; /* return zero */\r |
| 609 | unpacked->exponent = 0;\r |
| 610 | overflow = 0; /* with overflow clear */\r |
| 611 | }\r |
| 612 | else if (unpacked->exponent < /* result underflow? */\r |
| 613 | (FP_MAXNEXP >> (expand ? 0 : 2))) {\r |
| 614 | unpacked->mantissa = 0; /* return zero */\r |
| 615 | unpacked->exponent = 0;\r |
| 616 | overflow = 1; /* and set overflow */\r |
| 617 | }\r |
| 618 | else if (unpacked->exponent > /* result overflow? */\r |
| 619 | (FP_MAXPEXP >> (expand ? 0 : 2))) {\r |
| 620 | if (sign && /* negative value? */\r |
| 621 | (unpacked->precision == fp_x) && /* extended precision? */\r |
| 622 | (UNIT_CPU_MODEL != UNIT_1000_F)) { /* not F-series? */\r |
| 623 | unpacked->mantissa = FP_MAXNMANT; /* return negative infinity */\r |
| 624 | unpacked->exponent = FP_MAXPEXP & FP_M_EXP;\r |
| 625 | }\r |
| 626 | else {\r |
| 627 | unpacked->mantissa = FP_MAXPMANT; /* return positive infinity */\r |
| 628 | unpacked->exponent = FP_MAXPEXP & FP_M_EXP;\r |
| 629 | }\r |
| 630 | overflow = 1; /* and set overflow */\r |
| 631 | }\r |
| 632 | else\r |
| 633 | overflow = 0; /* value is in range */\r |
| 634 | \r |
| 635 | return overflow;\r |
| 636 | }\r |
| 637 | \r |
| 638 | \r |
| 639 | /* Normalize, round, and pack an unpacked floating-point number. */\r |
| 640 | \r |
| 641 | static uint32 nrpack (OP *packed, FPU unpacked, t_bool expand)\r |
| 642 | {\r |
| 643 | uint32 overflow;\r |
| 644 | \r |
| 645 | normalize (&unpacked); /* normalize for rounding */\r |
| 646 | overflow = roundovf (&unpacked, expand); /* round and check for overflow */\r |
| 647 | *packed = pack (unpacked); /* pack result */\r |
| 648 | \r |
| 649 | return overflow;\r |
| 650 | }\r |
| 651 | \r |
| 652 | \r |
| 653 | \r |
| 654 | /* Low-level arithmetic routines. */\r |
| 655 | \r |
| 656 | \r |
| 657 | /* Complement an unpacked number. */\r |
| 658 | \r |
| 659 | static void complement (FPU *result)\r |
| 660 | {\r |
| 661 | if (result->mantissa == FP_MAXNMANT) { /* maximum negative? */\r |
| 662 | result->mantissa = FP_ONEHALF; /* complement of -1.0 * 2 ^ n */\r |
| 663 | result->exponent = result->exponent + 1; /* is 0.5 * 2 ^ (n + 1) */\r |
| 664 | }\r |
| 665 | else\r |
| 666 | result->mantissa = -result->mantissa; /* negate mantissa */\r |
| 667 | return;\r |
| 668 | }\r |
| 669 | \r |
| 670 | \r |
| 671 | /* Add two unpacked numbers.\r |
| 672 | \r |
| 673 | The mantissas are first aligned if necessary by scaling the smaller of the\r |
| 674 | two operands. If the magnitude of the difference between the exponents is\r |
| 675 | greater than the number of significant bits, then the smaller number has been\r |
| 676 | scaled to zero (swamped), and so the sum is simply the larger operand.\r |
| 677 | Otherwise, the sum is computed and checked for overflow, which has occured if\r |
| 678 | the signs of the operands are the same but differ from that of the result.\r |
| 679 | Scaling and renormalization is perfomed if overflow occurred.\r |
| 680 | */\r |
| 681 | \r |
| 682 | static void add (FPU *sum, FPU augend, FPU addend)\r |
| 683 | {\r |
| 684 | int32 magn;\r |
| 685 | t_bool bits_lost;\r |
| 686 | \r |
| 687 | if (augend.mantissa == 0)\r |
| 688 | *sum = addend; /* X + 0 = X */\r |
| 689 | \r |
| 690 | else if (addend.mantissa == 0)\r |
| 691 | *sum = augend; /* 0 + X = X */\r |
| 692 | \r |
| 693 | else {\r |
| 694 | magn = augend.exponent - addend.exponent; /* difference exponents */\r |
| 695 | \r |
| 696 | if (magn > 0) { /* addend smaller? */\r |
| 697 | *sum = augend; /* preset augend */\r |
| 698 | bits_lost = asr (&addend, magn); /* align addend */\r |
| 699 | }\r |
| 700 | else { /* augend smaller? */\r |
| 701 | *sum = addend; /* preset addend */\r |
| 702 | magn = -magn; /* make difference positive */\r |
| 703 | bits_lost = asr (&augend, magn); /* align augend */\r |
| 704 | }\r |
| 705 | \r |
| 706 | if (magn <= (int32) op_bits[augend.precision]) { /* value swamped? */\r |
| 707 | sum->mantissa = /* no, add mantissas */\r |
| 708 | addend.mantissa + augend.mantissa;\r |
| 709 | \r |
| 710 | if (((addend.mantissa < 0) == (augend.mantissa < 0)) && /* mantissa overflow? */\r |
| 711 | ((addend.mantissa < 0) != (sum->mantissa < 0))) {\r |
| 712 | bits_lost = bits_lost | lsrx (sum, 1); /* restore value */\r |
| 713 | sum->mantissa = /* restore sign */\r |
| 714 | sum-> mantissa | (addend.mantissa & FP_MSIGN);\r |
| 715 | }\r |
| 716 | \r |
| 717 | if (bits_lost) /* any bits lost? */\r |
| 718 | sum->mantissa = sum->mantissa | 1; /* include one for rounding */\r |
| 719 | }\r |
| 720 | }\r |
| 721 | return;\r |
| 722 | }\r |
| 723 | \r |
| 724 | \r |
| 725 | /* Multiply two unpacked numbers.\r |
| 726 | \r |
| 727 | The single-precision firmware (FMP) operates differently from the firmware\r |
| 728 | extended-precision (.XMPY) and the hardware multiplies of any precision.\r |
| 729 | Firmware implementations form 16-bit x 16-bit = 32-bit partial products and\r |
| 730 | sum them to form the result. The hardware uses a series of shifts and adds.\r |
| 731 | This means that firmware FMP and hardware FMP return slightly different\r |
| 732 | values, as may be seen by attempting to run the firmware FMP diagnostic on\r |
| 733 | the FPP.\r |
| 734 | \r |
| 735 | The FMP microcode calls a signed multiply routine to calculate three partial\r |
| 736 | products (all but LSB * LSB). Because the LSBs are unsigned, i.e., all bits\r |
| 737 | significant, the two MSB * LSB products are calculated using LSB/2. The\r |
| 738 | unsigned right-shift ensures a positive LSB with no significant bits lost,\r |
| 739 | because the lower eight bits are unused (they held the vacated exponent). In\r |
| 740 | order to sum the partial products, the LSB of the result of MSB * MSB is also\r |
| 741 | right-shifted before addition. Note, though, that this loses a significant\r |
| 742 | bit. After summation, the result is left-shifted to correct for the original\r |
| 743 | right shifts.\r |
| 744 | \r |
| 745 | The .XMPY microcode negates both operands as necessary to produce positive\r |
| 746 | values and then forms six of the nine 16-bit x 16-bit = 32-bit unsigned\r |
| 747 | multiplications required for a full 96-bit product. Given a 48-bit\r |
| 748 | multiplicand "a1a2a3" and a 48-bit multiplier "b1b2b3", the firmware performs\r |
| 749 | these calculations to develop a 48-bit product:\r |
| 750 | \r |
| 751 | a1 a2 a3\r |
| 752 | +-------+-------+-------+\r |
| 753 | b1 b2 b3\r |
| 754 | +-------+-------+-------+\r |
| 755 | _________________________\r |
| 756 | \r |
| 757 | a1 * b3 [p1]\r |
| 758 | +-------+-------+\r |
| 759 | a2 * b2 [p2]\r |
| 760 | +-------+-------+\r |
| 761 | a1 * b2 [p3]\r |
| 762 | +-------+-------+\r |
| 763 | a3 * b1 [p4]\r |
| 764 | +-------+-------+\r |
| 765 | a2 * b1 [p5]\r |
| 766 | +-------+-------+\r |
| 767 | a1 * b1 [p6]\r |
| 768 | +-------+-------+\r |
| 769 | _________________________________\r |
| 770 | \r |
| 771 | product\r |
| 772 | +-------+-------+-------+\r |
| 773 | \r |
| 774 | The least-significant words of partial products [p1], [p2], and [p4] are used\r |
| 775 | only to develop a carry bit into the 48-bit sum. The product is complemented\r |
| 776 | as necessary to restore the sign.\r |
| 777 | \r |
| 778 | The basic FPP hardware algorithm scans the multiplier and adds a shifted copy\r |
| 779 | of the multiplicand whenever a one-bit is detected. To avoid successive adds\r |
| 780 | when a string of ones is encountered (because adds are more expensive than\r |
| 781 | shifts), the hardware instead adds the multiplicand shifted by N+1+P and\r |
| 782 | subtracts the multiplicand shifted by P to obtain the equivalent value with a\r |
| 783 | maximum of two operations.\r |
| 784 | \r |
| 785 | Instead of implementing either the .XMPY firmware algorithm or the hardware\r |
| 786 | shift-and-add algorithm directly, it is more efficient under simulation to\r |
| 787 | use 32 x 32 = 64-bit multiplications, thereby reducing the number required\r |
| 788 | from six to four (64-bit "c1c2" x 64-bit "d1d2"):\r |
| 789 | \r |
| 790 | ah al\r |
| 791 | +-------+-------+\r |
| 792 | bh bl\r |
| 793 | +-------+-------+\r |
| 794 | _________________\r |
| 795 | \r |
| 796 | al * bl [ll]\r |
| 797 | +-------+-------+\r |
| 798 | ah * bl [hl]\r |
| 799 | +-------+-------+\r |
| 800 | al * bh [lh]\r |
| 801 | +-------+-------+\r |
| 802 | ah * bh [hh]\r |
| 803 | +-------+-------+\r |
| 804 | _________________________________\r |
| 805 | \r |
| 806 | product\r |
| 807 | +-------+-------+\r |
| 808 | \r |
| 809 | However, the FMP algorithm is implemented directly from the microcode to\r |
| 810 | preserve the fidelity of the simulation, i.e., to lose the same amount\r |
| 811 | of precision.\r |
| 812 | */\r |
| 813 | \r |
| 814 | static void multiply (FPU *product, FPU multiplicand, FPU multiplier)\r |
| 815 | {\r |
| 816 | uint32 ah, al, bh, bl, sign = 0;\r |
| 817 | t_uint64 hh, hl, lh, ll, carry;\r |
| 818 | int16 ch, cl, dh, dl;\r |
| 819 | t_bool firmware;\r |
| 820 | \r |
| 821 | product->precision = multiplicand.precision; /* set precision */\r |
| 822 | \r |
| 823 | if ((multiplicand.mantissa == 0) || /* 0 * X = 0 */\r |
| 824 | (multiplier.mantissa == 0)) /* X * 0 = 0 */\r |
| 825 | product->mantissa = product->exponent = 0;\r |
| 826 | \r |
| 827 | else {\r |
| 828 | firmware = (UNIT_CPU_MODEL != UNIT_1000_F); /* set firmware flag */\r |
| 829 | \r |
| 830 | if (!firmware || (product->precision != fp_f)) { /* hardware? */\r |
| 831 | if (multiplicand.mantissa < 0) { /* negative? */\r |
| 832 | complement (&multiplicand); /* complement operand */\r |
| 833 | sign = ~sign; /* track sign */\r |
| 834 | }\r |
| 835 | if (multiplier.mantissa < 0) { /* negative? */\r |
| 836 | complement (&multiplier); /* complement operand */\r |
| 837 | sign = ~sign; /* track sign */\r |
| 838 | }\r |
| 839 | }\r |
| 840 | \r |
| 841 | product->exponent = /* compute exponent */\r |
| 842 | multiplicand.exponent + multiplier.exponent + 1;\r |
| 843 | \r |
| 844 | ah = (uint32) (multiplicand.mantissa >> 32); /* split multiplicand */\r |
| 845 | al = (uint32) (multiplicand.mantissa & DMASK32); /* into high and low parts */\r |
| 846 | bh = (uint32) (multiplier.mantissa >> 32); /* split multiplier */\r |
| 847 | bl = (uint32) (multiplier.mantissa & DMASK32); /* into high and low parts */\r |
| 848 | \r |
| 849 | if (firmware && (product->precision == fp_f)) { /* single-precision firmware? */\r |
| 850 | ch = (int16) (ah >> 16) & DMASK; /* split 32-bit multiplicand */\r |
| 851 | cl = (int16) (ah & 0xfffe); /* into high and low parts */\r |
| 852 | dh = (int16) (bh >> 16) & DMASK; /* split 32-bit multiplier */\r |
| 853 | dl = (int16) (bh & 0xfffe); /* into high and low parts */\r |
| 854 | \r |
| 855 | hh = (t_uint64) (((int32) ch * dh) & ~1); /* form cross products */\r |
| 856 | hl = (t_uint64) (((t_int64) ch * (t_int64) (uint16) dl +\r |
| 857 | (t_int64) dh * (t_int64) (uint16) cl) &\r |
| 858 | 0xfffffffffffe0000);\r |
| 859 | \r |
| 860 | product->mantissa = (t_uint64) (((t_int64) hh << 32) + /* sum partials */\r |
| 861 | ((t_int64) hl << 16));\r |
| 862 | }\r |
| 863 | \r |
| 864 | else {\r |
| 865 | hh = ((t_uint64) ah * bh); /* form four cross products */\r |
| 866 | hl = ((t_uint64) ah * bl); /* using 32 x 32 = */\r |
| 867 | lh = ((t_uint64) al * bh); /* 64-bit multiplies */\r |
| 868 | ll = ((t_uint64) al * bl);\r |
| 869 | \r |
| 870 | carry = ((ll >> 32) + (uint32) hl + (uint32) lh) >> 32; /* form carry */\r |
| 871 | \r |
| 872 | product->mantissa = hh + (hl >> 32) + (lh >> 32) + carry; /* sum partials */\r |
| 873 | \r |
| 874 | if (sign) /* negate if required */\r |
| 875 | complement (product);\r |
| 876 | }\r |
| 877 | }\r |
| 878 | return;\r |
| 879 | }\r |
| 880 | \r |
| 881 | \r |
| 882 | /* Divide two unpacked numbers.\r |
| 883 | \r |
| 884 | As with multiply, the single-precision firmware (FDV) operates differently\r |
| 885 | from the firmware extended-precision (.XDIV) and the hardware divisions of\r |
| 886 | any precision. Firmware implementations utilize a "divide and correct"\r |
| 887 | algorithm, wherein the quotient is estimated and then corrected by comparing\r |
| 888 | the dividend to the product of the quotient and the divisor. The hardware\r |
| 889 | uses a series of shifts and subtracts. This means that firmware FDV and\r |
| 890 | hardware FDV once again return slightly different values.\r |
| 891 | \r |
| 892 | Under simulation, the classic divide-and-correct method is employed, using\r |
| 893 | 64-bit / 32-bit = 32-bit divisions. This involves dividing the 64-bit\r |
| 894 | dividend "a1a2a3a4" by the first 32-bit digit "b1b2" of the 64-bit divisor\r |
| 895 | "b1b2b3b4". The resulting 32-bit quotient is ...\r |
| 896 | \r |
| 897 | The microcoded single-precision division avoids overflows by right-shifting\r |
| 898 | some values, which leads to a loss of precision in the LSBs. We duplicate\r |
| 899 | the firmware algorithm here to preserve the fidelity of the simulation.\r |
| 900 | */\r |
| 901 | \r |
| 902 | static void divide (FPU *quotient, FPU dividend, FPU divisor)\r |
| 903 | {\r |
| 904 | uint32 sign = 0;\r |
| 905 | t_int64 bh, bl, r1, r0, p1, p0;\r |
| 906 | t_uint64 q, q1, q0;\r |
| 907 | t_bool firmware;\r |
| 908 | int32 ah, div, cp;\r |
| 909 | int16 dh, dl, pq1, pq2, cq;\r |
| 910 | \r |
| 911 | quotient->precision = dividend.precision; /* set precision */\r |
| 912 | \r |
| 913 | if (divisor.mantissa == 0) { /* division by zero? */\r |
| 914 | if (dividend.mantissa < 0)\r |
| 915 | quotient->mantissa = FP_MSIGN; /* return minus infinity */\r |
| 916 | else\r |
| 917 | quotient->mantissa = ~FP_MSIGN; /* or plus infinity */\r |
| 918 | quotient->exponent = FP_MAXPEXP + 1;\r |
| 919 | }\r |
| 920 | \r |
| 921 | else if (dividend.mantissa == 0) /* dividend zero? */\r |
| 922 | quotient->mantissa = quotient->exponent = 0; /* yes; result is zero */\r |
| 923 | \r |
| 924 | else {\r |
| 925 | firmware = (UNIT_CPU_MODEL != UNIT_1000_F); /* set firmware flag */\r |
| 926 | \r |
| 927 | if (!firmware || (quotient->precision != fp_f)) { /* hardware or FFP? */\r |
| 928 | if (dividend.mantissa < 0) { /* negative? */\r |
| 929 | complement (÷nd); /* complement operand */\r |
| 930 | sign = ~sign; /* track sign */\r |
| 931 | }\r |
| 932 | if (divisor.mantissa < 0) { /* negative? */\r |
| 933 | complement (&divisor); /* complement operand */\r |
| 934 | sign = ~sign; /* track sign */\r |
| 935 | }\r |
| 936 | }\r |
| 937 | \r |
| 938 | quotient->exponent = /* division subtracts exponents */\r |
| 939 | dividend.exponent - divisor.exponent;\r |
| 940 | \r |
| 941 | bh = divisor.mantissa >> 32; /* split divisor */\r |
| 942 | bl = divisor.mantissa & DMASK32; /* into high and low parts */\r |
| 943 | \r |
| 944 | if (firmware && (quotient->precision == fp_f)) { /* single-precision firmware? */\r |
| 945 | quotient->exponent = quotient->exponent + 1; /* fix exponent */\r |
| 946 | \r |
| 947 | ah = (int32) (dividend.mantissa >> 32); /* split dividend */\r |
| 948 | dh = (int16) (bh >> 16); /* split divisor again */\r |
| 949 | dl = (int16) bh;\r |
| 950 | \r |
| 951 | div = ah >> 2; /* ASR 2 to prevent overflow */\r |
| 952 | \r |
| 953 | pq1 = div / dh; /* form first partial quotient */\r |
| 954 | div = ((div % dh) & ~1) << 15; /* ASR 1, move rem to upper */\r |
| 955 | pq2 = div / dh; /* form second partial quotient */\r |
| 956 | \r |
| 957 | div = (uint16) dl << 13; /* move divisor LSB to upper, LSR 3 */\r |
| 958 | cq = div / dh; /* form correction quotient */\r |
| 959 | cp = -cq * pq1; /* and correction product */\r |
| 960 | \r |
| 961 | cp = (((cp >> 14) & ~3) + (int32) pq2) << 1; /* add corr prod and 2nd partial quo */\r |
| 962 | quotient->mantissa = /* add 1st partial quo and align */\r |
| 963 | (t_uint64) (((int32) pq1 << 16) + cp) << 32;\r |
| 964 | }\r |
| 965 | \r |
| 966 | else { /* hardware or FFP */\r |
| 967 | q1 = (t_uint64) (dividend.mantissa / bh); /* form 1st trial quotient */\r |
| 968 | r1 = dividend.mantissa % bh; /* and remainder */\r |
| 969 | p1 = (r1 << 24) - (bl >> 8) * q1; /* calculate correction */\r |
| 970 | \r |
| 971 | while (p1 < 0) { /* correction needed? */\r |
| 972 | q1 = q1 - 1; /* trial quotient too large */\r |
| 973 | p1 = p1 + (divisor.mantissa >> 8); /* increase remainder */\r |
| 974 | }\r |
| 975 | \r |
| 976 | q0 = (t_uint64) ((p1 << 8) / bh); /* form 2nd trial quotient */\r |
| 977 | r0 = (p1 << 8) % bh; /* and remainder */\r |
| 978 | p0 = (r0 << 24) - (bl >> 8) * q0; /* calculate correction */\r |
| 979 | \r |
| 980 | while (p0 < 0) { /* correction needed? */\r |
| 981 | q0 = q0 - 1; /* trial quotient too large */\r |
| 982 | p0 = p0 + (divisor.mantissa >> 8); /* increase remainder */\r |
| 983 | }\r |
| 984 | \r |
| 985 | q = (q1 << 32) + q0; /* sum quotient digits */\r |
| 986 | \r |
| 987 | if (q1 & 0xffffffff00000000) { /* did we lose MSB? */\r |
| 988 | q = (q >> 1) | 0x8000000000000000; /* shift right and replace bit */\r |
| 989 | quotient->exponent = quotient->exponent + 1;/* bump exponent for shift */\r |
| 990 | }\r |
| 991 | \r |
| 992 | if (q & 0x8000000000000000) /* lose normalization? */\r |
| 993 | q = q >> 1; /* correct */\r |
| 994 | \r |
| 995 | quotient->mantissa = (t_int64) q;\r |
| 996 | }\r |
| 997 | \r |
| 998 | if (sign)\r |
| 999 | complement (quotient); /* negate if required */\r |
| 1000 | }\r |
| 1001 | return;\r |
| 1002 | }\r |
| 1003 | \r |
| 1004 | \r |
| 1005 | /* Fix an unpacked number.\r |
| 1006 | \r |
| 1007 | A floating-point value is converted to an integer. The desired precision of\r |
| 1008 | the result (single or double integer) must be set before calling.\r |
| 1009 | \r |
| 1010 | Values less than 0.5 (i.e., with negative exponents) underflow to zero. If\r |
| 1011 | the value exceeds the specified integer range, the maximum integer value is\r |
| 1012 | returned and overflow is set. Otherwise, the floating-point value is\r |
| 1013 | right-shifted to zero the exponent. The result is then rounded.\r |
| 1014 | */\r |
| 1015 | \r |
| 1016 | static uint32 fix (FPU *result, FPU operand)\r |
| 1017 | {\r |
| 1018 | uint32 overflow;\r |
| 1019 | t_bool bits_lost;\r |
| 1020 | \r |
| 1021 | if (operand.exponent < 0) { /* value < 0.5? */\r |
| 1022 | result->mantissa = 0; /* result rounds to zero */\r |
| 1023 | overflow = 0; /* clear for underflow */\r |
| 1024 | }\r |
| 1025 | \r |
| 1026 | else if (operand.exponent > /* value > integer size? */\r |
| 1027 | (int32) op_bits[result->precision]) {\r |
| 1028 | result->mantissa = /* return max int value */\r |
| 1029 | (t_uint64) int_p_max[result->precision] <<\r |
| 1030 | op_start[result->precision];\r |
| 1031 | overflow = 1; /* and set overflow */\r |
| 1032 | }\r |
| 1033 | \r |
| 1034 | else { /* value in range */\r |
| 1035 | bits_lost = asr (&operand, /* shift to zero exponent */\r |
| 1036 | op_bits[result->precision] - operand.exponent);\r |
| 1037 | \r |
| 1038 | if (operand.mantissa < 0) { /* value negative? */\r |
| 1039 | if (bits_lost) /* bits lost? */\r |
| 1040 | operand.mantissa = operand.mantissa | 1; /* include one for rounding */\r |
| 1041 | \r |
| 1042 | operand.mantissa = operand.mantissa + /* round result */\r |
| 1043 | p_half_lsb[result->precision];\r |
| 1044 | }\r |
| 1045 | \r |
| 1046 | result->mantissa = operand.mantissa & /* mask to precision */\r |
| 1047 | op_mask[result->precision];\r |
| 1048 | overflow = 0;\r |
| 1049 | }\r |
| 1050 | \r |
| 1051 | result->exponent = 0; /* tidy up for integer value */\r |
| 1052 | return overflow;\r |
| 1053 | }\r |
| 1054 | \r |
| 1055 | \r |
| 1056 | /* Float an integer to an unpacked number.\r |
| 1057 | \r |
| 1058 | An integer is converted to a floating-point value. The desired precision of\r |
| 1059 | the result must be set before calling.\r |
| 1060 | \r |
| 1061 | Conversion is simply a matter of copying the integer value, setting an\r |
| 1062 | exponent that reflects the right-aligned position of the bits, and\r |
| 1063 | normalizing.\r |
| 1064 | */\r |
| 1065 | \r |
| 1066 | static void ffloat (FPU *result, FPU operand)\r |
| 1067 | {\r |
| 1068 | result->mantissa = operand.mantissa; /* set value */\r |
| 1069 | result->exponent = op_bits[operand.precision]; /* set exponent */\r |
| 1070 | normalize (result); /* normalize */\r |
| 1071 | return;\r |
| 1072 | }\r |
| 1073 | \r |
| 1074 | \r |
| 1075 | \r |
| 1076 | /* High-level floating-point routines. */\r |
| 1077 | \r |
| 1078 | \r |
| 1079 | /* Determine operand precisions.\r |
| 1080 | \r |
| 1081 | The precisions of the operands and result are determined by decoding an\r |
| 1082 | operation opcode and returned to the caller. Pass NULL for both of the\r |
| 1083 | operands if only the result precision is wanted. Pass NULL for the result if\r |
| 1084 | only the operand precisions are wanted.\r |
| 1085 | */\r |
| 1086 | \r |
| 1087 | void fp_prec (uint16 opcode, OPSIZE *operand_l, OPSIZE *operand_r, OPSIZE *result)\r |
| 1088 | {\r |
| 1089 | OPSIZE fp_size, int_size;\r |
| 1090 | \r |
| 1091 | fp_size = (OPSIZE) ((opcode & 0003) + 2); /* fp_f, fp_x, fp_t, fp_e */\r |
| 1092 | int_size = (OPSIZE) ((opcode & 0004) >> 2); /* in_s, in_d */\r |
| 1093 | \r |
| 1094 | if (operand_l && operand_r) { /* want operand precisions? */\r |
| 1095 | switch (opcode & 0120) { /* mask out opcode bit 5 */\r |
| 1096 | case 0000: /* add/mpy */\r |
| 1097 | case 0020: /* sub/div */\r |
| 1098 | *operand_l = fp_size; /* assume first op is fp */\r |
| 1099 | \r |
| 1100 | if (opcode & 0004) /* operand internal? */\r |
| 1101 | *operand_r = fp_a; /* second op is accum */\r |
| 1102 | else\r |
| 1103 | *operand_r = fp_size; /* second op is fp */\r |
| 1104 | break;\r |
| 1105 | \r |
| 1106 | case 0100: /* fix/accum as integer */\r |
| 1107 | *operand_l = fp_size; /* first op is fp */\r |
| 1108 | *operand_r = fp_a; /* second op is always null */\r |
| 1109 | break;\r |
| 1110 | \r |
| 1111 | case 0120: /* flt/accum as float */\r |
| 1112 | *operand_l = int_size; /* first op is integer */\r |
| 1113 | *operand_r = fp_a; /* second op is always null */\r |
| 1114 | break;\r |
| 1115 | }\r |
| 1116 | \r |
| 1117 | if (opcode & 0010) /* operand internal? */\r |
| 1118 | *operand_l = fp_a; /* first op is accum */\r |
| 1119 | }\r |
| 1120 | \r |
| 1121 | if (result) /* want result precision? */\r |
| 1122 | if ((opcode & 0120) == 0100) /* fix? */\r |
| 1123 | *result = int_size; /* result is integer */\r |
| 1124 | else /* all others */\r |
| 1125 | *result = fp_size; /* result is fp */\r |
| 1126 | \r |
| 1127 | return;\r |
| 1128 | }\r |
| 1129 | \r |
| 1130 | \r |
| 1131 | /* Floating Point Processor executor.\r |
| 1132 | \r |
| 1133 | The executor simulates the MPP interface between the CPU and the FPP. The\r |
| 1134 | operation to be performed is specified by the supplied opcode, which conforms\r |
| 1135 | to the FPP hardware interface, as follows:\r |
| 1136 | \r |
| 1137 | Bits Value Action\r |
| 1138 | ---- ----- ----------------------------------------------\r |
| 1139 | 7 0 Exponent range is standard (+/-127)\r |
| 1140 | 1 Exponent range is expanded (+/-511)\r |
| 1141 | \r |
| 1142 | 6-4 000 Add\r |
| 1143 | 001 Subtract\r |
| 1144 | 010 Multiply\r |
| 1145 | 011 Divide\r |
| 1146 | 100 Fix\r |
| 1147 | 101 Float\r |
| 1148 | 110 (diagnostic)\r |
| 1149 | 111 (diagnostic)\r |
| 1150 | \r |
| 1151 | 3 0 Left operand is supplied\r |
| 1152 | 1 Left operand in accumulator\r |
| 1153 | \r |
| 1154 | 2 0 Right operand is supplied (ADD/SUB/MPY/DIV)\r |
| 1155 | Single integer operation (FIX/FLT)\r |
| 1156 | 1 Right operand in accumulator (ADD/SUB/MPY/DIV)\r |
| 1157 | Double integer operation (FIX/FLT)\r |
| 1158 | \r |
| 1159 | 1-0 00 2-word operation\r |
| 1160 | 01 3-word operation\r |
| 1161 | 10 4-word operation\r |
| 1162 | 11 5-word operation\r |
| 1163 | \r |
| 1164 | If the opcode specifies that the left (or right) operand is in the\r |
| 1165 | accumulator, then the value supplied for that parameter is not used. All\r |
| 1166 | results are automatically left in the accumulator. If the result is not\r |
| 1167 | needed externally, then NULL may be passed for the result parameter.\r |
| 1168 | \r |
| 1169 | To support accumulator set/get operations under simulation, the opcode is\r |
| 1170 | expanded to include a special mode, indicated by bit 15 = 1. In this mode,\r |
| 1171 | if the result parameter is NULL, then the accumulator is set from the value\r |
| 1172 | passed as operand_l. If the result parameter is not null, then the\r |
| 1173 | accumulator value is returned as the result, and operand_l is ignored. The\r |
| 1174 | precision of the operation is performed as specified by the OPSIZE value\r |
| 1175 | passed in bits 2-0 of the opcode.\r |
| 1176 | \r |
| 1177 | The function returns 1 if the operation overflows and 0 if not.\r |
| 1178 | */\r |
| 1179 | \r |
| 1180 | uint32 fp_exec (uint16 opcode, OP *result, OP operand_l, OP operand_r)\r |
| 1181 | {\r |
| 1182 | static FPU accumulator;\r |
| 1183 | FPU uoperand_l, uoperand_r;\r |
| 1184 | OPSIZE op_l_prec, op_r_prec, rslt_prec;\r |
| 1185 | uint32 overflow;\r |
| 1186 | \r |
| 1187 | if (opcode & SIGN) { /* accumulator mode? */\r |
| 1188 | rslt_prec = (OPSIZE) (opcode & 0017); /* get operation precision */\r |
| 1189 | \r |
| 1190 | if (result) { /* get accumulator? */\r |
| 1191 | op_l_prec = accumulator.precision; /* save accum prec temp */\r |
| 1192 | accumulator.precision = rslt_prec; /* set desired precision */\r |
| 1193 | *result = pack (accumulator); /* pack accumulator */\r |
| 1194 | accumulator.precision = op_l_prec; /* restore correct prec */\r |
| 1195 | }\r |
| 1196 | else /* set accumulator */\r |
| 1197 | accumulator = unpack (operand_l, rslt_prec); /* unpack from operand */\r |
| 1198 | \r |
| 1199 | return 0; /* no overflow from accum ops */\r |
| 1200 | }\r |
| 1201 | \r |
| 1202 | fp_prec (opcode, &op_l_prec, &op_r_prec, &rslt_prec); /* calc precs from opcode */\r |
| 1203 | \r |
| 1204 | if (op_l_prec == fp_a) /* left operand in accum? */\r |
| 1205 | uoperand_l = accumulator; /* copy it */\r |
| 1206 | else /* operand supplied */\r |
| 1207 | uoperand_l = unpack (operand_l, op_l_prec); /* unpack from parameter */\r |
| 1208 | \r |
| 1209 | if (op_r_prec == fp_a) /* right operand in accum? */\r |
| 1210 | uoperand_r = accumulator; /* copy it */\r |
| 1211 | else /* operand supplied */\r |
| 1212 | uoperand_r = unpack (operand_r, op_r_prec); /* unpack from parameter */\r |
| 1213 | \r |
| 1214 | \r |
| 1215 | switch (opcode & 0160) { /* dispatch operation */\r |
| 1216 | \r |
| 1217 | case 0000: /* add */\r |
| 1218 | add (&accumulator, uoperand_l, uoperand_r);\r |
| 1219 | break;\r |
| 1220 | \r |
| 1221 | case 0020: /* subtract */\r |
| 1222 | complement (&uoperand_r);\r |
| 1223 | add (&accumulator, uoperand_l, uoperand_r);\r |
| 1224 | break;\r |
| 1225 | \r |
| 1226 | case 0040: /* multiply */\r |
| 1227 | multiply (&accumulator, uoperand_l, uoperand_r);\r |
| 1228 | break;\r |
| 1229 | \r |
| 1230 | case 0060: /* divide */\r |
| 1231 | divide (&accumulator, uoperand_l, uoperand_r);\r |
| 1232 | break;\r |
| 1233 | \r |
| 1234 | case 0100: /* fix */\r |
| 1235 | accumulator.precision = rslt_prec;\r |
| 1236 | overflow = fix (&accumulator, uoperand_l);\r |
| 1237 | \r |
| 1238 | if (result) /* result wanted? */\r |
| 1239 | *result = pack_int (accumulator.mantissa, /* pack integer */\r |
| 1240 | rslt_prec);\r |
| 1241 | return overflow;\r |
| 1242 | \r |
| 1243 | case 0120: /* float */\r |
| 1244 | accumulator.precision = rslt_prec;\r |
| 1245 | ffloat (&accumulator, uoperand_l);\r |
| 1246 | \r |
| 1247 | if (result) /* result wanted? */\r |
| 1248 | *result = pack (accumulator); /* pack FP (FLT does not round) */\r |
| 1249 | return 0;\r |
| 1250 | \r |
| 1251 | case 0140: /* (diagnostic) */\r |
| 1252 | case 0160: /* (diagnostic) */\r |
| 1253 | return 0;\r |
| 1254 | }\r |
| 1255 | \r |
| 1256 | if (UNIT_CPU_MODEL != UNIT_1000_F) /* firmware implementation? */\r |
| 1257 | accumulator.mantissa = accumulator.mantissa & /* mask to precision */\r |
| 1258 | op_mask[accumulator.precision];\r |
| 1259 | \r |
| 1260 | normalize (&accumulator); /* normalize */\r |
| 1261 | overflow = roundovf (&accumulator, opcode & 0200); /* round and check for overflow */\r |
| 1262 | \r |
| 1263 | if (result) /* result wanted? */\r |
| 1264 | *result = pack (accumulator); /* pack result */\r |
| 1265 | \r |
| 1266 | return overflow;\r |
| 1267 | }\r |
| 1268 | \r |
| 1269 | \r |
| 1270 | /* Set or get accumulator at desired precision.\r |
| 1271 | \r |
| 1272 | This function provides access to the FPP accumulator. In hardware, the\r |
| 1273 | accumulator may be read at a given precision by sending the FPP an opcode\r |
| 1274 | encoded with the desired precision and then reading words from the FPP\r |
| 1275 | /without/ initiating the operation, i.e., without starting the processor.\r |
| 1276 | \r |
| 1277 | Under simulation, pass this function a NULL operand and the desired\r |
| 1278 | precision to read the accumulator. Pass a pointer to an operand and the\r |
| 1279 | desired precision to set the accumulator; the return value in this case is\r |
| 1280 | not defined.\r |
| 1281 | */\r |
| 1282 | \r |
| 1283 | OP fp_accum (const OP *operand, OPSIZE precision)\r |
| 1284 | {\r |
| 1285 | OP result = NOP;\r |
| 1286 | uint16 opcode = (uint16) precision | SIGN; /* add special mode bit */\r |
| 1287 | \r |
| 1288 | if (operand)\r |
| 1289 | fp_exec (opcode, NULL, *operand, NOP); /* set accum */\r |
| 1290 | else\r |
| 1291 | fp_exec (opcode, &result, NOP, NOP); /* get accum */\r |
| 1292 | return result;\r |
| 1293 | }\r |
| 1294 | \r |
| 1295 | \r |
| 1296 | /* Pack an unpacked floating-point number.\r |
| 1297 | \r |
| 1298 | An unpacked mantissa is passed as a "packed" number with an unused exponent.\r |
| 1299 | The mantissa and separately-passed exponent are packed into the in-memory\r |
| 1300 | floating-point format. Note that all bits are significant in the mantissa\r |
| 1301 | (no masking is done).\r |
| 1302 | */\r |
| 1303 | \r |
| 1304 | uint32 fp_pack (OP *result, OP mantissa, int32 exponent, OPSIZE precision)\r |
| 1305 | {\r |
| 1306 | FPU unpacked;\r |
| 1307 | \r |
| 1308 | unpacked.mantissa = unpack_int (mantissa, precision); /* unpack mantissa */\r |
| 1309 | unpacked.exponent = exponent; /* set exponent */\r |
| 1310 | unpacked.precision = precision; /* set precision */\r |
| 1311 | *result = pack (unpacked); /* pack them */\r |
| 1312 | return 0;\r |
| 1313 | }\r |
| 1314 | \r |
| 1315 | \r |
| 1316 | /* Normalize, round, and pack an unpacked floating-point number.\r |
| 1317 | \r |
| 1318 | An unpacked mantissa is passed as a "packed" number with an unused exponent.\r |
| 1319 | The mantissa and separately-passed exponent are normalized, rounded, and\r |
| 1320 | packed into the in-memory floating-point format. Note that all bits are\r |
| 1321 | significant in the mantissa (no masking is done).\r |
| 1322 | */\r |
| 1323 | \r |
| 1324 | uint32 fp_nrpack (OP *result, OP mantissa, int32 exponent, OPSIZE precision)\r |
| 1325 | {\r |
| 1326 | FPU unpacked;\r |
| 1327 | \r |
| 1328 | unpacked.mantissa = unpack_int (mantissa, precision); /* unpack mantissa */\r |
| 1329 | unpacked.exponent = exponent; /* set exponent */\r |
| 1330 | unpacked.precision = precision; /* set precision */\r |
| 1331 | return nrpack (result, unpacked, FALSE); /* norm/rnd/pack them */\r |
| 1332 | }\r |
| 1333 | \r |
| 1334 | \r |
| 1335 | /* Unpack a packed floating-point number.\r |
| 1336 | \r |
| 1337 | A floating-point number, packed into the in-memory format, is unpacked into\r |
| 1338 | separate mantissa and exponent values. The unpacked mantissa is returned in\r |
| 1339 | a "packed" structure with an exponent of zero. Mantissa or exponent may be\r |
| 1340 | null if that part isn't wanted.\r |
| 1341 | */\r |
| 1342 | \r |
| 1343 | uint32 fp_unpack (OP *mantissa, int32 *exponent, OP packed, OPSIZE precision)\r |
| 1344 | \r |
| 1345 | {\r |
| 1346 | FPU unpacked;\r |
| 1347 | \r |
| 1348 | unpacked = unpack (packed, precision); /* unpack mantissa and exponent */\r |
| 1349 | \r |
| 1350 | if (exponent) /* exponent wanted? */\r |
| 1351 | *exponent = unpacked.exponent; /* return exponent */\r |
| 1352 | \r |
| 1353 | if (mantissa) /* mantissa wanted? */\r |
| 1354 | *mantissa = pack_int (unpacked.mantissa, fp_t); /* return full-size mantissa */\r |
| 1355 | return 0;\r |
| 1356 | }\r |
| 1357 | \r |
| 1358 | \r |
| 1359 | /* Complement an unpacked mantissa.\r |
| 1360 | \r |
| 1361 | An unpacked mantissa is passed as a "packed" number with a zero exponent.\r |
| 1362 | The exponent increment, i.e., either zero or one, depending on whether a\r |
| 1363 | renormalization was required, is returned. Note that all bits are\r |
| 1364 | significant in the mantissa.\r |
| 1365 | */\r |
| 1366 | \r |
| 1367 | uint32 fp_ucom (OP *mantissa, OPSIZE precision)\r |
| 1368 | {\r |
| 1369 | FPU unpacked;\r |
| 1370 | \r |
| 1371 | unpacked.mantissa = unpack_int (*mantissa, precision); /* unpack mantissa */\r |
| 1372 | unpacked.exponent = 0; /* clear undefined exponent */\r |
| 1373 | unpacked.precision = precision; /* set precision */\r |
| 1374 | complement (&unpacked); /* negate it */\r |
| 1375 | *mantissa = pack_int (unpacked.mantissa, precision); /* replace mantissa */\r |
| 1376 | return (uint32) unpacked.exponent; /* return exponent increment */\r |
| 1377 | }\r |
| 1378 | \r |
| 1379 | \r |
| 1380 | /* Complement a floating-point number. */\r |
| 1381 | \r |
| 1382 | uint32 fp_pcom (OP *packed, OPSIZE precision)\r |
| 1383 | {\r |
| 1384 | FPU unpacked;\r |
| 1385 | \r |
| 1386 | unpacked = unpack (*packed, precision); /* unpack the number */\r |
| 1387 | complement (&unpacked); /* negate it */\r |
| 1388 | return nrpack (packed, unpacked, FALSE); /* and norm/rnd/pack */\r |
| 1389 | }\r |
| 1390 | \r |
| 1391 | \r |
| 1392 | /* Truncate a floating-point number. */\r |
| 1393 | \r |
| 1394 | uint32 fp_trun (OP *result, OP source, OPSIZE precision)\r |
| 1395 | {\r |
| 1396 | t_bool bits_lost;\r |
| 1397 | FPU unpacked;\r |
| 1398 | FPU one = { FP_ONEHALF, 1, 0 }; /* 0.5 * 2 ** 1 = 1.0 */\r |
| 1399 | OP zero = { { 0, 0, 0, 0, 0 } }; /* 0.0 */\r |
| 1400 | t_uint64 mask = mant_mask[precision] & ~FP_MSIGN;\r |
| 1401 | \r |
| 1402 | unpacked = unpack (source, precision);\r |
| 1403 | if (unpacked.exponent < 0) /* number < 0.5? */\r |
| 1404 | *result = zero; /* return 0 */\r |
| 1405 | else if (unpacked.exponent >= (int32) op_bits[precision]) /* no fractional bits? */\r |
| 1406 | *result = source; /* already integer */\r |
| 1407 | else {\r |
| 1408 | mask = (mask >> unpacked.exponent) & mask; /* mask fractional bits */\r |
| 1409 | bits_lost = ((unpacked.mantissa & mask) != 0); /* flag if bits lost */\r |
| 1410 | unpacked.mantissa = unpacked.mantissa & ~mask; /* mask off fraction */\r |
| 1411 | if ((unpacked.mantissa < 0) && bits_lost) /* negative? */\r |
| 1412 | add (&unpacked, unpacked, one); /* truncate toward zero */\r |
| 1413 | nrpack (result, unpacked, FALSE); /* (overflow cannot occur) */\r |
| 1414 | }\r |
| 1415 | return 0; /* clear overflow on return */\r |
| 1416 | }\r |
| 1417 | \r |
| 1418 | \r |
| 1419 | /* Convert a floating-point number from one precision to another. */\r |
| 1420 | \r |
| 1421 | uint32 fp_cvt (OP *result, OPSIZE source_precision, OPSIZE dest_precision)\r |
| 1422 | {\r |
| 1423 | FPU unpacked;\r |
| 1424 | \r |
| 1425 | unpacked = unpack (*result, source_precision);\r |
| 1426 | unpacked.precision = dest_precision;\r |
| 1427 | return nrpack (result, unpacked, FALSE); /* norm/rnd/pack */\r |
| 1428 | }\r |
| 1429 | \r |
| 1430 | \r |
| 1431 | #endif /* end of int64 support */\r |