Commit | Line | Data |
---|---|---|
196ba1fc PH |
1 | /* hp2100_fp1.c: HP 1000 multiple-precision floating point routines\r |
2 | \r | |
3 | Copyright (c) 2005-2008, J. David Bryan\r | |
4 | \r | |
5 | Permission is hereby granted, free of charge, to any person obtaining a\r | |
6 | copy of this software and associated documentation files (the "Software"),\r | |
7 | to deal in the Software without restriction, including without limitation\r | |
8 | the rights to use, copy, modify, merge, publish, distribute, sublicense,\r | |
9 | and/or sell copies of the Software, and to permit persons to whom the\r | |
10 | Software is furnished to do so, subject to the following conditions:\r | |
11 | \r | |
12 | The above copyright notice and this permission notice shall be included in\r | |
13 | all copies or substantial portions of the Software.\r | |
14 | \r | |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\r | |
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\r | |
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\r | |
18 | THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER\r | |
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r | |
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\r | |
21 | \r | |
22 | Except as contained in this notice, the name of the author shall not be used\r | |
23 | in advertising or otherwise to promote the sale, use or other dealings in\r | |
24 | this Software without prior written authorization from the author.\r | |
25 | \r | |
26 | 10-May-08 JDB Fixed uninitialized return in fp_accum when setting\r | |
27 | 19-Mar-08 JDB Reworked "complement" to avoid inlining bug in gcc-4.x\r | |
28 | 01-Dec-06 JDB Reworked into generalized multiple-precision ops for FPP\r | |
29 | 12-Oct-06 JDB Altered x_trun for F-Series FFP compatibility\r | |
30 | Added F-Series ..TCM FFP helpers\r | |
31 | \r | |
32 | Primary references:\r | |
33 | - HP 1000 M/E/F-Series Computers Engineering and Reference Documentation\r | |
34 | (92851-90001, Mar-1981)\r | |
35 | - HP 1000 M/E/F-Series Computers Technical Reference Handbook\r | |
36 | (5955-0282, Mar-1980)\r | |
37 | - DOS/RTE Relocatable Library Reference Manual\r | |
38 | (24998-90001, Oct-1981)\r | |
39 | \r | |
40 | \r | |
41 | This module implements multiple-precision floating-point operations to\r | |
42 | support the 1000 F-Series hardware Floating Point Processor. It employs\r | |
43 | 64-bit integer arithmetic for speed and simplicity of implementation. The\r | |
44 | host compiler must support 64-bit integers, and the HAVE_INT64 symbol must be\r | |
45 | defined during compilation. If this symbol is not defined, then FPP support\r | |
46 | is not available.\r | |
47 | \r | |
48 | HP 2100/1000 computers used a proprietary floating-point format. The 2100\r | |
49 | had optional firmware that provided two-word floating-point add, subtract,\r | |
50 | multiply, and divide, as well as single-integer fix and float. The 1000-M/E\r | |
51 | provided the same two-word firmware operations as standard equipment.\r | |
52 | Three-word extended-precision instructions for the 2100 and 1000-M/E were\r | |
53 | provided by the optional Fast FORTRAN Processor firmware.\r | |
54 | \r | |
55 | The 1000-F substituted a hardware floating point processor for the firmware\r | |
56 | in previous machines. In addition to the two- and three-word formats, the\r | |
57 | F-Series introduced a four-word double-precision format. A five-word format\r | |
58 | that provided extra range in the exponent by unpacking it from the mantissa\r | |
59 | was also provided, although this capability was not documented in the user\r | |
60 | manual. In addition, the FPP improved the accuracy of floating-point\r | |
61 | calculations, as the firmware versions sacrificed a few bits of precision to\r | |
62 | gain speed. Consequently, operations on the F-Series may return results that\r | |
63 | differ slightly from the same operations on the M/E-Series or the 2100.\r | |
64 | \r | |
65 | F-Series units after date code 1920 also provided two-word double-integer\r | |
66 | instructions in firmware, as well as double-integer fix and float operations.\r | |
67 | \r | |
68 | The original 32-bit floating-point format is as follows:\r | |
69 | \r | |
70 | 15 14 0\r | |
71 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r | |
72 | |MS| mantissa high | : M\r | |
73 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r | |
74 | | mantissa low | exponent |XS| : M + 1\r | |
75 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r | |
76 | 15 8 7 1 0\r | |
77 | \r | |
78 | Both 23-bit mantissa and 7-bit exponent are in twos-complement form. The\r | |
79 | exponent sign bit has been rotated into the LSB of the second word.\r | |
80 | \r | |
81 | The extended-precision floating-point format is a 48-bit extension of the\r | |
82 | 32-bit format used for single precision. A packed extended-precision value\r | |
83 | consists of a 39-bit mantissa and a 7-bit exponent. The format is as\r | |
84 | follows:\r | |
85 | \r | |
86 | 15 14 0\r | |
87 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r | |
88 | |MS| mantissa high | : M\r | |
89 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r | |
90 | | mantissa middle | : M + 1\r | |
91 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r | |
92 | | mantissa low | exponent |XS| : M + 2\r | |
93 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r | |
94 | 15 8 7 1 0\r | |
95 | \r | |
96 | The double-precision floating-point format is similar to the 48-bit\r | |
97 | extended-precision format, although with a 55-bit mantissa:\r | |
98 | \r | |
99 | 15 14 0\r | |
100 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r | |
101 | |MS| mantissa high | : M\r | |
102 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r | |
103 | | mantissa middle high | : M + 1\r | |
104 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r | |
105 | | mantissa middle low | : M + 2\r | |
106 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r | |
107 | | mantissa low | exponent |XS| : M + 3\r | |
108 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r | |
109 | 15 8 7 1 0\r | |
110 | \r | |
111 | The FPP also supports a special five-word expanded-exponent format:\r | |
112 | \r | |
113 | 15 14 0\r | |
114 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r | |
115 | |MS| mantissa high | : M\r | |
116 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r | |
117 | | mantissa middle high | : M + 1\r | |
118 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r | |
119 | | mantissa middle low | : M + 2\r | |
120 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r | |
121 | | mantissa low | : M + 3\r | |
122 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r | |
123 | | exponent |XS| : M + 4\r | |
124 | +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+\r | |
125 | 15 8 7 1 0\r | |
126 | \r | |
127 | The exponent is a full 16-bit twos-complement value, but the allowed range is\r | |
128 | only 10 bits, i.e., -512 to +511.\r | |
129 | \r | |
130 | In a normalized value, the sign and MSB of the mantissa differ. Zero is\r | |
131 | represented by all words = 0.\r | |
132 | \r | |
133 | Internally, unpacked floating-point values are contained in a structure\r | |
134 | having a signed 64-bit mantissa and a signed 32-bit exponent. Mantissas are\r | |
135 | left-justified with the unused bits masked to zero. Exponents are\r | |
136 | right-justified. The precision is indicated by the value of a structure\r | |
137 | field.\r | |
138 | \r | |
139 | HP terminology for the three-word floating-point format is confused. Some\r | |
140 | documents refer to it as "double precision," while others use "extended\r | |
141 | precision." The instruction mnemonics begin with "X" (e.g., .XADD),\r | |
142 | suggesting the extended-precision term.\r | |
143 | \r | |
144 | HP apparently intended that the four-word double-precision format would be\r | |
145 | called "triple-precision," as the instruction mnemonics begin with "T" (e.g.,\r | |
146 | ".TADD" for the four-word add instruction). The source files for the\r | |
147 | software simulations of these instructions for the M/E-Series also explicitly\r | |
148 | refer to "triple precision math." However, the engineering documentation and\r | |
149 | the F-Series reference manual both use the double-precision term.\r | |
150 | \r | |
151 | This module adopts the single/extended/double terminology and uses the\r | |
152 | initial letters of the instructions (F/X/T) to indicate the precision used.\r | |
153 | \r | |
154 | The FPP hardware consisted of two circuit boards that interfaced to the main\r | |
155 | CPU via the Microprogammable Processor Port (MPP) that had been introduced\r | |
156 | with the 1000 E-Series. One board contained argument registers and ALUs,\r | |
157 | split into separate mantissa and exponent parts. The other contained a state\r | |
158 | machine sequencer. FPP results were copied automatically to the argument\r | |
159 | registers in addition to being available over the MPP, so that chained\r | |
160 | operations could be executed from these "accumulators" without reloading.\r | |
161 | \r | |
162 | The FPP operated independently of the CPU. An opcode, specifying one of the\r | |
163 | six operations (add, subtract, multiply, divide, fix, or float) was sent to\r | |
164 | the FPP, and a start command was given. Operands of appropriate precision\r | |
165 | were then supplied to the FPP. Once the operands were received, the FPP\r | |
166 | would execute and set a flag when the operation was complete. The result\r | |
167 | would then be retrieved from the FPP. The floating-point instruction\r | |
168 | firmware in the CPU initiated the desired FPP operation and handled operand\r | |
169 | reads from and result writes to main memory.\r | |
170 | \r | |
171 | Under simulation, "fp_exec" provides the six arithmetic operations analogous\r | |
172 | to FPP execution. The remainder of the functions are helpers that were\r | |
173 | provided by firmware in the 1000-F but that can reuse code needed to simulate\r | |
174 | the FPP hardware. As with the hardware, "fp_exec" retains the last result\r | |
175 | in an internal accumulator that may be referenced in subsequent operations.\r | |
176 | \r | |
177 | NOTE: this module also provides the floating-point support for the firmware\r | |
178 | single-precision 1000-M/E base set and extended-precision FFP instructions.\r | |
179 | Because the firmware and hardware implementations returned slightly different\r | |
180 | results, particularly with respect to round-off, conditional checks are\r | |
181 | implemented in the arithmetic routines. In some cases, entirely different\r | |
182 | algorithms are used to ensure fidelity with the real machines. Functionally,\r | |
183 | this means that the 2100/1000-M/E and 1000-F floating-point diagnostics are\r | |
184 | not interchangeable, and failures are to be expected if a diagnostic is run\r | |
185 | on the wrong machine.\r | |
186 | */\r | |
187 | \r | |
188 | #include "hp2100_defs.h"\r | |
189 | #include "hp2100_cpu.h"\r | |
190 | #include "hp2100_cpu1.h"\r | |
191 | #include "hp2100_fp1.h"\r | |
192 | \r | |
193 | \r | |
194 | #if defined (HAVE_INT64) /* we need int64 support */\r | |
195 | \r | |
196 | /* Field widths. */\r | |
197 | \r | |
198 | #define IN_W_SIGN 1\r | |
199 | #define IN_W_SMAGN 15\r | |
200 | #define IN_W_DMAGN 31\r | |
201 | \r | |
202 | #define FP_W_MSIGN 1\r | |
203 | #define FP_W_FMANT 23\r | |
204 | #define FP_W_XMANT 39\r | |
205 | #define FP_W_TMANT 55\r | |
206 | #define FP_W_EMANT 55\r | |
207 | #define FP_W_EXPANDEXP 9\r | |
208 | #define FP_W_EXP 7\r | |
209 | #define FP_W_ESIGN 1\r | |
210 | \r | |
211 | /* Starting bit numbers. */\r | |
212 | \r | |
213 | #define IN_V_SIGN (64 - IN_W_SIGN)\r | |
214 | #define IN_V_SNUM (64 - IN_W_SIGN - IN_W_SMAGN)\r | |
215 | #define IN_V_DNUM (64 - IN_W_SIGN - IN_W_DMAGN)\r | |
216 | \r | |
217 | #define FP_V_FNUM (64 - FP_W_MSIGN - FP_W_FMANT - FP_W_EXP - FP_W_ESIGN)\r | |
218 | #define FP_V_XNUM (64 - FP_W_MSIGN - FP_W_XMANT - FP_W_EXP - FP_W_ESIGN)\r | |
219 | #define FP_V_TNUM (64 - FP_W_MSIGN - FP_W_TMANT - FP_W_EXP - FP_W_ESIGN)\r | |
220 | #define FP_V_ENUM (64 - FP_W_MSIGN - FP_W_EMANT - FP_W_EXP - FP_W_ESIGN)\r | |
221 | \r | |
222 | #define FP_V_MSIGN (64 - FP_W_MSIGN)\r | |
223 | #define FP_V_FMANT (64 - FP_W_MSIGN - FP_W_FMANT)\r | |
224 | #define FP_V_XMANT (64 - FP_W_MSIGN - FP_W_XMANT)\r | |
225 | #define FP_V_TMANT (64 - FP_W_MSIGN - FP_W_TMANT)\r | |
226 | #define FP_V_EMANT (64 - FP_W_MSIGN - FP_W_EMANT)\r | |
227 | #define FP_V_EXP 1\r | |
228 | #define FP_V_ESIGN 0\r | |
229 | \r | |
230 | /* Right-aligned field masks. */\r | |
231 | \r | |
232 | #define IN_M_SIGN (((t_uint64) 1 << IN_W_SIGN) - 1)\r | |
233 | #define IN_M_SMAGN (((t_uint64) 1 << IN_W_SMAGN) - 1)\r | |
234 | #define IN_M_DMAGN (((t_uint64) 1 << IN_W_DMAGN) - 1)\r | |
235 | \r | |
236 | #define FP_M_MSIGN (((t_uint64) 1 << FP_W_MSIGN) - 1)\r | |
237 | #define FP_M_FMANT (((t_uint64) 1 << FP_W_FMANT) - 1)\r | |
238 | #define FP_M_XMANT (((t_uint64) 1 << FP_W_XMANT) - 1)\r | |
239 | #define FP_M_TMANT (((t_uint64) 1 << FP_W_TMANT) - 1)\r | |
240 | #define FP_M_EMANT (((t_uint64) 1 << FP_W_EMANT) - 1)\r | |
241 | \r | |
242 | #define FP_M_EXPANDEXP ((1 << FP_W_EXPANDEXP) - 1)\r | |
243 | #define FP_M_EXP ((1 << FP_W_EXP) - 1)\r | |
244 | #define FP_M_ESIGN ((1 << FP_W_ESIGN) - 1)\r | |
245 | \r | |
246 | /* In-place field masks. */\r | |
247 | \r | |
248 | #define IN_SIGN (IN_M_SIGN << IN_V_SIGN)\r | |
249 | #define IN_SMAGN (IN_M_SMAGN << IN_V_SNUM)\r | |
250 | #define IN_DMAGN (IN_M_DMAGN << IN_V_DNUM)\r | |
251 | \r | |
252 | #define FP_MSIGN (FP_M_MSIGN << FP_V_MSIGN)\r | |
253 | #define FP_FMANT (FP_M_FMANT << FP_V_FMANT)\r | |
254 | #define FP_XMANT (FP_M_XMANT << FP_V_XMANT)\r | |
255 | #define FP_TMANT (FP_M_TMANT << FP_V_TMANT)\r | |
256 | #define FP_EMANT (FP_M_EMANT << FP_V_EMANT)\r | |
257 | #define FP_EXP (FP_M_EXP << FP_V_EXP)\r | |
258 | #define FP_ESIGN (FP_M_ESIGN << FP_V_ESIGN)\r | |
259 | \r | |
260 | /* In-place record masks. */\r | |
261 | \r | |
262 | #define IN_SSMAGN (IN_SIGN | IN_SMAGN)\r | |
263 | #define IN_SDMAGN (IN_SIGN | IN_DMAGN)\r | |
264 | \r | |
265 | #define FP_SFMANT (FP_MSIGN | FP_FMANT)\r | |
266 | #define FP_SXMANT (FP_MSIGN | FP_XMANT)\r | |
267 | #define FP_STMANT (FP_MSIGN | FP_TMANT)\r | |
268 | #define FP_SEMANT (FP_MSIGN | FP_EMANT)\r | |
269 | #define FP_SEXP (FP_ESIGN | FP_EXP)\r | |
270 | \r | |
271 | /* Minima and maxima. */\r | |
272 | \r | |
273 | #define FP_ONEHALF ((t_int64) 1 << (FP_V_MSIGN - 1)) /* mantissa = 0.5 */\r | |
274 | #define FP_MAXPMANT ((t_int64) FP_EMANT) /* maximum pos mantissa */\r | |
275 | #define FP_MAXNMANT ((t_int64) FP_MSIGN) /* maximum neg mantissa */\r | |
276 | #define FP_MAXPEXP (FP_M_EXPANDEXP) /* maximum pos expanded exponent */\r | |
277 | #define FP_MAXNEXP (-(FP_MAXPEXP + 1)) /* maximum neg expanded exponent */\r | |
278 | \r | |
279 | /* Floating-point helpers. */\r | |
280 | \r | |
281 | #define DENORM(x) ((((x) ^ (x) << 1) & FP_MSIGN) == 0)\r | |
282 | \r | |
283 | #define TO_EXP(e) (int8) ((e >> FP_V_EXP & FP_M_EXP) | \\r | |
284 | (e & FP_M_ESIGN ? ~FP_M_EXP : 0))\r | |
285 | \r | |
286 | /* Property constants. */\r | |
287 | \r | |
288 | static const t_int64 p_half_lsb[6] = { ((t_int64) 1 << IN_V_SNUM) - 1, /* different than FP! */\r | |
289 | ((t_int64) 1 << IN_V_DNUM) - 1, /* different than FP! */\r | |
290 | (t_int64) 1 << (FP_V_FMANT - 1),\r | |
291 | (t_int64) 1 << (FP_V_XMANT - 1),\r | |
292 | (t_int64) 1 << (FP_V_TMANT - 1),\r | |
293 | (t_int64) 1 << (FP_V_EMANT - 1) };\r | |
294 | \r | |
295 | static const t_int64 n_half_lsb[6] = { 0,\r | |
296 | 0,\r | |
297 | ((t_int64) 1 << (FP_V_FMANT - 1)) - 1,\r | |
298 | ((t_int64) 1 << (FP_V_XMANT - 1)) - 1,\r | |
299 | ((t_int64) 1 << (FP_V_TMANT - 1)) - 1,\r | |
300 | ((t_int64) 1 << (FP_V_EMANT - 1)) - 1 };\r | |
301 | \r | |
302 | static const uint32 op_start[6] = { IN_V_SNUM,\r | |
303 | IN_V_DNUM,\r | |
304 | FP_V_FMANT,\r | |
305 | FP_V_XMANT,\r | |
306 | FP_V_TMANT,\r | |
307 | FP_V_EMANT };\r | |
308 | \r | |
309 | static const t_int64 mant_mask[6] = { IN_SSMAGN,\r | |
310 | IN_SDMAGN,\r | |
311 | FP_SFMANT,\r | |
312 | FP_SXMANT,\r | |
313 | FP_STMANT,\r | |
314 | FP_SEMANT };\r | |
315 | \r | |
316 | static const uint32 op_bits[6] = { IN_W_SMAGN,\r | |
317 | IN_W_DMAGN,\r | |
318 | FP_W_FMANT + FP_W_MSIGN,\r | |
319 | FP_W_XMANT + FP_W_MSIGN,\r | |
320 | FP_W_TMANT + FP_W_MSIGN,\r | |
321 | FP_W_EMANT + FP_W_MSIGN };\r | |
322 | \r | |
323 | static const t_int64 op_mask[6] = { ~(t_int64) 0 << IN_V_SNUM,\r | |
324 | ~(t_int64) 0 << IN_V_DNUM,\r | |
325 | ~(t_int64) 0 << FP_V_FNUM,\r | |
326 | ~(t_int64) 0 << FP_V_XNUM,\r | |
327 | ~(t_int64) 0 << FP_V_TNUM,\r | |
328 | ~(t_int64) 0 << FP_V_ENUM };\r | |
329 | \r | |
330 | static const uint32 int_p_max[2] = { IN_M_SMAGN,\r | |
331 | IN_M_DMAGN };\r | |
332 | \r | |
333 | \r | |
334 | /* Internal unpacked floating-point representation. */\r | |
335 | \r | |
336 | typedef struct {\r | |
337 | t_int64 mantissa;\r | |
338 | int32 exponent;\r | |
339 | OPSIZE precision;\r | |
340 | } FPU;\r | |
341 | \r | |
342 | \r | |
343 | \r | |
344 | /* Low-level helper routines. */\r | |
345 | \r | |
346 | \r | |
347 | /* Arithmetic shift right for mantissa only.\r | |
348 | \r | |
349 | Returns TRUE if any one-bits are shifted out (for F-series only).\r | |
350 | */\r | |
351 | \r | |
352 | static t_bool asr (FPU *operand, int32 shift)\r | |
353 | {\r | |
354 | t_uint64 mask;\r | |
355 | t_bool bits_lost;\r | |
356 | \r | |
357 | if (UNIT_CPU_MODEL == UNIT_1000_F) { /* F-series? */\r | |
358 | mask = ((t_uint64) 1 << shift) - 1; /* mask for lost bits */\r | |
359 | bits_lost = ((operand->mantissa & mask) != 0); /* flag if any lost */\r | |
360 | }\r | |
361 | else\r | |
362 | bits_lost = FALSE;\r | |
363 | \r | |
364 | operand->mantissa = operand->mantissa >> shift; /* mantissa is int, so ASR */\r | |
365 | return bits_lost;\r | |
366 | }\r | |
367 | \r | |
368 | \r | |
369 | /* Logical shift right for mantissa and exponent.\r | |
370 | \r | |
371 | Shifts mantissa and corrects exponent for mantissa overflow.\r | |
372 | Returns TRUE if any one-bits are shifted out (for F-series only).\r | |
373 | */\r | |
374 | \r | |
375 | static t_bool lsrx (FPU *operand, int32 shift)\r | |
376 | {\r | |
377 | t_uint64 mask;\r | |
378 | t_bool bits_lost;\r | |
379 | \r | |
380 | if (UNIT_CPU_MODEL == UNIT_1000_F) { /* F-series? */\r | |
381 | mask = ((t_uint64) 1 << shift) - 1; /* mask for lost bits */\r | |
382 | bits_lost = ((operand->mantissa & mask) != 0); /* flag if any lost */\r | |
383 | }\r | |
384 | else\r | |
385 | bits_lost = FALSE;\r | |
386 | \r | |
387 | operand->mantissa = (t_uint64) operand->mantissa >> shift; /* uint, so LSR */\r | |
388 | operand->exponent = operand->exponent + shift; /* correct exponent */\r | |
389 | return bits_lost;\r | |
390 | }\r | |
391 | \r | |
392 | \r | |
393 | /* Unpack an operand into a long integer.\r | |
394 | \r | |
395 | Returns a left-aligned integer or mantissa. Does not mask to precision; this\r | |
396 | should be done subsequently if desired.\r | |
397 | */\r | |
398 | \r | |
399 | static t_int64 unpack_int (OP packed, OPSIZE precision)\r | |
400 | {\r | |
401 | uint32 i;\r | |
402 | t_uint64 unpacked = 0;\r | |
403 | \r | |
404 | if (precision == in_s)\r | |
405 | unpacked = (t_uint64) packed.word << 48; /* unpack single integer */\r | |
406 | \r | |
407 | else if (precision == in_d)\r | |
408 | unpacked = (t_uint64) packed.dword << 32; /* unpack double integer */\r | |
409 | \r | |
410 | else {\r | |
411 | if (precision == fp_e) /* five word operand? */\r | |
412 | precision = fp_t; /* only four mantissa words */\r | |
413 | \r | |
414 | for (i = 0; i < 4; i++) /* unpack fp 2 to 4 words */\r | |
415 | if (i < TO_COUNT (precision))\r | |
416 | unpacked = unpacked << 16 | packed.fpk[i];\r | |
417 | else\r | |
418 | unpacked = unpacked << 16;\r | |
419 | }\r | |
420 | \r | |
421 | return (t_int64) unpacked;\r | |
422 | }\r | |
423 | \r | |
424 | \r | |
425 | /* Unpack a packed operand.\r | |
426 | \r | |
427 | The packed value is split into separate mantissa and exponent variables. The\r | |
428 | multiple words of the mantissa are concatenated into a single 64-bit signed\r | |
429 | value, and the exponent is shifted with recovery of the sign.\r | |
430 | */\r | |
431 | \r | |
432 | static FPU unpack (OP packed, OPSIZE precision)\r | |
433 | {\r | |
434 | FPU unpacked;\r | |
435 | \r | |
436 | unpacked.precision = precision; /* set value's precision */\r | |
437 | \r | |
438 | unpacked.mantissa = /* unpack and mask mantissa */\r | |
439 | unpack_int (packed, precision) & mant_mask[precision];\r | |
440 | \r | |
441 | switch (precision) {\r | |
442 | \r | |
443 | case fp_f:\r | |
444 | case fp_x:\r | |
445 | case fp_t:\r | |
446 | unpacked.exponent = /* unpack exponent from correct word */\r | |
447 | TO_EXP (packed.fpk[(uint32) precision - 1]);\r | |
448 | break;\r | |
449 | \r | |
450 | case fp_e:\r | |
451 | unpacked.exponent = /* unpack expanded exponent */\r | |
452 | (int16) (packed.fpk[4] >> FP_V_EXP | /* rotate sign into place */\r | |
453 | (packed.fpk[4] & 1 ? SIGN : 0));\r | |
454 | break;\r | |
455 | \r | |
456 | case fp_a: /* no action for value in accum */\r | |
457 | case in_s: /* integers don't use exponent */\r | |
458 | case in_d: /* integers don't use exponent */\r | |
459 | default:\r | |
460 | unpacked.exponent = 0;\r | |
461 | break;\r | |
462 | }\r | |
463 | \r | |
464 | return unpacked;\r | |
465 | }\r | |
466 | \r | |
467 | \r | |
468 | /* Pack a long integer into an operand. */\r | |
469 | \r | |
470 | static OP pack_int (t_int64 unpacked, OPSIZE precision)\r | |
471 | {\r | |
472 | int32 i;\r | |
473 | OP packed;\r | |
474 | \r | |
475 | if (precision == in_s)\r | |
476 | packed.word = (uint16) (unpacked >> 48) & DMASK; /* pack single integer */\r | |
477 | \r | |
478 | else if (precision == in_d)\r | |
479 | packed.dword = (uint32) (unpacked >> 32) & DMASK32; /* pack double integer */\r | |
480 | \r | |
481 | else {\r | |
482 | if (precision == fp_e) /* five word operand? */\r | |
483 | precision = fp_t; /* only four mantissa words */\r | |
484 | \r | |
485 | for (i = 3; i >= 0; i--) { /* pack fp 2 to 4 words */\r | |
486 | packed.fpk[i] = (uint16) unpacked & DMASK;\r | |
487 | unpacked = unpacked >> 16;\r | |
488 | }\r | |
489 | }\r | |
490 | \r | |
491 | return packed;\r | |
492 | }\r | |
493 | \r | |
494 | \r | |
495 | /* Pack an unpacked floating-point number.\r | |
496 | \r | |
497 | The 64-bit mantissa is split into the appropriate number of 16-bit words.\r | |
498 | The exponent is rotated to incorporate the sign bit and merged into the\r | |
499 | appropriate word.\r | |
500 | */\r | |
501 | \r | |
502 | static OP pack (FPU unpacked)\r | |
503 | {\r | |
504 | OP packed;\r | |
505 | uint8 exp;\r | |
506 | \r | |
507 | packed = pack_int (unpacked.mantissa, unpacked.precision); /* pack mantissa */\r | |
508 | \r | |
509 | exp = ((uint8) unpacked.exponent << FP_V_EXP) | /* rotate exponent */\r | |
510 | ((unpacked.exponent < 0) << FP_V_ESIGN);\r | |
511 | \r | |
512 | switch (unpacked.precision) { /* merge exponent into correct word */\r | |
513 | \r | |
514 | case in_s: /* no action for integers */\r | |
515 | case in_d:\r | |
516 | break;\r | |
517 | \r | |
518 | case fp_f: /* merge into last word */\r | |
519 | case fp_x:\r | |
520 | case fp_t:\r | |
521 | packed.fpk[(uint32) unpacked.precision - 1] =\r | |
522 | (packed.fpk[(uint32) unpacked.precision - 1] & ~FP_SEXP) | exp;\r | |
523 | break;\r | |
524 | \r | |
525 | case fp_e: /* place in separate word */\r | |
526 | packed.fpk[4] = ((uint16) unpacked.exponent << FP_V_EXP) |\r | |
527 | ((unpacked.exponent < 0) << FP_V_ESIGN);\r | |
528 | break;\r | |
529 | \r | |
530 | case fp_a: /* no action for value in accum */\r | |
531 | break;\r | |
532 | }\r | |
533 | \r | |
534 | return packed;\r | |
535 | }\r | |
536 | \r | |
537 | \r | |
538 | /* Normalize an unpacked floating-point number.\r | |
539 | \r | |
540 | Floating-point numbers are in normal form if the sign bit and the MSB of the\r | |
541 | mantissa differ. Unnormalized numbers are shifted as needed with appropriate\r | |
542 | exponent modification.\r | |
543 | */\r | |
544 | \r | |
545 | static void normalize (FPU *unpacked)\r | |
546 | {\r | |
547 | \r | |
548 | if (unpacked->mantissa) /* non-zero? */\r | |
549 | while (DENORM (unpacked->mantissa)) { /* normal form? */\r | |
550 | unpacked->exponent = unpacked->exponent - 1; /* no, so left shift */\r | |
551 | unpacked->mantissa = unpacked->mantissa << 1;\r | |
552 | }\r | |
553 | else\r | |
554 | unpacked->exponent = 0; /* clean for zero */\r | |
555 | return;\r | |
556 | }\r | |
557 | \r | |
558 | \r | |
559 | /* Round an unpacked floating-point number and check for overflow.\r | |
560 | \r | |
561 | An unpacked floating-point number is rounded by adding one-half of the LSB\r | |
562 | value, maintaining symmetry around zero. If rounding resulted in a mantissa\r | |
563 | overflow, the result logically is shifted to the right with an appropriate\r | |
564 | exponent modification. Finally, the result is checked for exponent underflow\r | |
565 | or overflow, and the appropriate approximation (zero or infinity) is\r | |
566 | returned.\r | |
567 | \r | |
568 | Rounding in hardware involves a special mantissa extension register that\r | |
569 | holds three "guard" bits and one "sticky" bit. These represent the value of\r | |
570 | bits right-shifted out the mantissa register. Under simulation, we track\r | |
571 | such right-shifts and utilize the lower eight bits of the 64-bit mantissa\r | |
572 | value to simulate the extension register.\r | |
573 | \r | |
574 | Overflow depends on whether the FPP expanded-exponent form is being used\r | |
575 | (this expands the exponent range by two bits). If overflow is detected, the\r | |
576 | value representing infinity is dependent on whether the operation is on\r | |
577 | behalf of the Fast FORTRAN Processor. The F-Series FPP returns positive\r | |
578 | infinity on both positive and negative overflow for all precisions. The 2100\r | |
579 | and M/E-Series FFPs return negative infinity on negative overflow of\r | |
580 | extended-precision values. Single-precision overflows on these machines\r | |
581 | always return positive infinity.\r | |
582 | \r | |
583 | The number to be rounded must be normalized upon entry.\r | |
584 | */\r | |
585 | \r | |
586 | static uint32 roundovf (FPU *unpacked, t_bool expand)\r | |
587 | {\r | |
588 | uint32 overflow;\r | |
589 | t_bool sign;\r | |
590 | \r | |
591 | sign = (unpacked->mantissa < 0); /* save mantissa sign */\r | |
592 | \r | |
593 | if (sign) /* round and mask the number */\r | |
594 | unpacked->mantissa =\r | |
595 | (unpacked->mantissa + n_half_lsb[unpacked->precision]) &\r | |
596 | mant_mask[unpacked->precision];\r | |
597 | else\r | |
598 | unpacked->mantissa =\r | |
599 | (unpacked->mantissa + p_half_lsb[unpacked->precision]) &\r | |
600 | mant_mask[unpacked->precision];\r | |
601 | \r | |
602 | if (sign != (unpacked->mantissa < 0)) /* mantissa overflow? */\r | |
603 | lsrx (unpacked, 1); /* correct by shifting */\r | |
604 | else\r | |
605 | normalize (unpacked); /* renorm may be needed */\r | |
606 | \r | |
607 | if (unpacked->mantissa == 0) { /* result zero? */\r | |
608 | unpacked->mantissa = 0; /* return zero */\r | |
609 | unpacked->exponent = 0;\r | |
610 | overflow = 0; /* with overflow clear */\r | |
611 | }\r | |
612 | else if (unpacked->exponent < /* result underflow? */\r | |
613 | (FP_MAXNEXP >> (expand ? 0 : 2))) {\r | |
614 | unpacked->mantissa = 0; /* return zero */\r | |
615 | unpacked->exponent = 0;\r | |
616 | overflow = 1; /* and set overflow */\r | |
617 | }\r | |
618 | else if (unpacked->exponent > /* result overflow? */\r | |
619 | (FP_MAXPEXP >> (expand ? 0 : 2))) {\r | |
620 | if (sign && /* negative value? */\r | |
621 | (unpacked->precision == fp_x) && /* extended precision? */\r | |
622 | (UNIT_CPU_MODEL != UNIT_1000_F)) { /* not F-series? */\r | |
623 | unpacked->mantissa = FP_MAXNMANT; /* return negative infinity */\r | |
624 | unpacked->exponent = FP_MAXPEXP & FP_M_EXP;\r | |
625 | }\r | |
626 | else {\r | |
627 | unpacked->mantissa = FP_MAXPMANT; /* return positive infinity */\r | |
628 | unpacked->exponent = FP_MAXPEXP & FP_M_EXP;\r | |
629 | }\r | |
630 | overflow = 1; /* and set overflow */\r | |
631 | }\r | |
632 | else\r | |
633 | overflow = 0; /* value is in range */\r | |
634 | \r | |
635 | return overflow;\r | |
636 | }\r | |
637 | \r | |
638 | \r | |
639 | /* Normalize, round, and pack an unpacked floating-point number. */\r | |
640 | \r | |
641 | static uint32 nrpack (OP *packed, FPU unpacked, t_bool expand)\r | |
642 | {\r | |
643 | uint32 overflow;\r | |
644 | \r | |
645 | normalize (&unpacked); /* normalize for rounding */\r | |
646 | overflow = roundovf (&unpacked, expand); /* round and check for overflow */\r | |
647 | *packed = pack (unpacked); /* pack result */\r | |
648 | \r | |
649 | return overflow;\r | |
650 | }\r | |
651 | \r | |
652 | \r | |
653 | \r | |
654 | /* Low-level arithmetic routines. */\r | |
655 | \r | |
656 | \r | |
657 | /* Complement an unpacked number. */\r | |
658 | \r | |
659 | static void complement (FPU *result)\r | |
660 | {\r | |
661 | if (result->mantissa == FP_MAXNMANT) { /* maximum negative? */\r | |
662 | result->mantissa = FP_ONEHALF; /* complement of -1.0 * 2 ^ n */\r | |
663 | result->exponent = result->exponent + 1; /* is 0.5 * 2 ^ (n + 1) */\r | |
664 | }\r | |
665 | else\r | |
666 | result->mantissa = -result->mantissa; /* negate mantissa */\r | |
667 | return;\r | |
668 | }\r | |
669 | \r | |
670 | \r | |
671 | /* Add two unpacked numbers.\r | |
672 | \r | |
673 | The mantissas are first aligned if necessary by scaling the smaller of the\r | |
674 | two operands. If the magnitude of the difference between the exponents is\r | |
675 | greater than the number of significant bits, then the smaller number has been\r | |
676 | scaled to zero (swamped), and so the sum is simply the larger operand.\r | |
677 | Otherwise, the sum is computed and checked for overflow, which has occured if\r | |
678 | the signs of the operands are the same but differ from that of the result.\r | |
679 | Scaling and renormalization is perfomed if overflow occurred.\r | |
680 | */\r | |
681 | \r | |
682 | static void add (FPU *sum, FPU augend, FPU addend)\r | |
683 | {\r | |
684 | int32 magn;\r | |
685 | t_bool bits_lost;\r | |
686 | \r | |
687 | if (augend.mantissa == 0)\r | |
688 | *sum = addend; /* X + 0 = X */\r | |
689 | \r | |
690 | else if (addend.mantissa == 0)\r | |
691 | *sum = augend; /* 0 + X = X */\r | |
692 | \r | |
693 | else {\r | |
694 | magn = augend.exponent - addend.exponent; /* difference exponents */\r | |
695 | \r | |
696 | if (magn > 0) { /* addend smaller? */\r | |
697 | *sum = augend; /* preset augend */\r | |
698 | bits_lost = asr (&addend, magn); /* align addend */\r | |
699 | }\r | |
700 | else { /* augend smaller? */\r | |
701 | *sum = addend; /* preset addend */\r | |
702 | magn = -magn; /* make difference positive */\r | |
703 | bits_lost = asr (&augend, magn); /* align augend */\r | |
704 | }\r | |
705 | \r | |
706 | if (magn <= (int32) op_bits[augend.precision]) { /* value swamped? */\r | |
707 | sum->mantissa = /* no, add mantissas */\r | |
708 | addend.mantissa + augend.mantissa;\r | |
709 | \r | |
710 | if (((addend.mantissa < 0) == (augend.mantissa < 0)) && /* mantissa overflow? */\r | |
711 | ((addend.mantissa < 0) != (sum->mantissa < 0))) {\r | |
712 | bits_lost = bits_lost | lsrx (sum, 1); /* restore value */\r | |
713 | sum->mantissa = /* restore sign */\r | |
714 | sum-> mantissa | (addend.mantissa & FP_MSIGN);\r | |
715 | }\r | |
716 | \r | |
717 | if (bits_lost) /* any bits lost? */\r | |
718 | sum->mantissa = sum->mantissa | 1; /* include one for rounding */\r | |
719 | }\r | |
720 | }\r | |
721 | return;\r | |
722 | }\r | |
723 | \r | |
724 | \r | |
725 | /* Multiply two unpacked numbers.\r | |
726 | \r | |
727 | The single-precision firmware (FMP) operates differently from the firmware\r | |
728 | extended-precision (.XMPY) and the hardware multiplies of any precision.\r | |
729 | Firmware implementations form 16-bit x 16-bit = 32-bit partial products and\r | |
730 | sum them to form the result. The hardware uses a series of shifts and adds.\r | |
731 | This means that firmware FMP and hardware FMP return slightly different\r | |
732 | values, as may be seen by attempting to run the firmware FMP diagnostic on\r | |
733 | the FPP.\r | |
734 | \r | |
735 | The FMP microcode calls a signed multiply routine to calculate three partial\r | |
736 | products (all but LSB * LSB). Because the LSBs are unsigned, i.e., all bits\r | |
737 | significant, the two MSB * LSB products are calculated using LSB/2. The\r | |
738 | unsigned right-shift ensures a positive LSB with no significant bits lost,\r | |
739 | because the lower eight bits are unused (they held the vacated exponent). In\r | |
740 | order to sum the partial products, the LSB of the result of MSB * MSB is also\r | |
741 | right-shifted before addition. Note, though, that this loses a significant\r | |
742 | bit. After summation, the result is left-shifted to correct for the original\r | |
743 | right shifts.\r | |
744 | \r | |
745 | The .XMPY microcode negates both operands as necessary to produce positive\r | |
746 | values and then forms six of the nine 16-bit x 16-bit = 32-bit unsigned\r | |
747 | multiplications required for a full 96-bit product. Given a 48-bit\r | |
748 | multiplicand "a1a2a3" and a 48-bit multiplier "b1b2b3", the firmware performs\r | |
749 | these calculations to develop a 48-bit product:\r | |
750 | \r | |
751 | a1 a2 a3\r | |
752 | +-------+-------+-------+\r | |
753 | b1 b2 b3\r | |
754 | +-------+-------+-------+\r | |
755 | _________________________\r | |
756 | \r | |
757 | a1 * b3 [p1]\r | |
758 | +-------+-------+\r | |
759 | a2 * b2 [p2]\r | |
760 | +-------+-------+\r | |
761 | a1 * b2 [p3]\r | |
762 | +-------+-------+\r | |
763 | a3 * b1 [p4]\r | |
764 | +-------+-------+\r | |
765 | a2 * b1 [p5]\r | |
766 | +-------+-------+\r | |
767 | a1 * b1 [p6]\r | |
768 | +-------+-------+\r | |
769 | _________________________________\r | |
770 | \r | |
771 | product\r | |
772 | +-------+-------+-------+\r | |
773 | \r | |
774 | The least-significant words of partial products [p1], [p2], and [p4] are used\r | |
775 | only to develop a carry bit into the 48-bit sum. The product is complemented\r | |
776 | as necessary to restore the sign.\r | |
777 | \r | |
778 | The basic FPP hardware algorithm scans the multiplier and adds a shifted copy\r | |
779 | of the multiplicand whenever a one-bit is detected. To avoid successive adds\r | |
780 | when a string of ones is encountered (because adds are more expensive than\r | |
781 | shifts), the hardware instead adds the multiplicand shifted by N+1+P and\r | |
782 | subtracts the multiplicand shifted by P to obtain the equivalent value with a\r | |
783 | maximum of two operations.\r | |
784 | \r | |
785 | Instead of implementing either the .XMPY firmware algorithm or the hardware\r | |
786 | shift-and-add algorithm directly, it is more efficient under simulation to\r | |
787 | use 32 x 32 = 64-bit multiplications, thereby reducing the number required\r | |
788 | from six to four (64-bit "c1c2" x 64-bit "d1d2"):\r | |
789 | \r | |
790 | ah al\r | |
791 | +-------+-------+\r | |
792 | bh bl\r | |
793 | +-------+-------+\r | |
794 | _________________\r | |
795 | \r | |
796 | al * bl [ll]\r | |
797 | +-------+-------+\r | |
798 | ah * bl [hl]\r | |
799 | +-------+-------+\r | |
800 | al * bh [lh]\r | |
801 | +-------+-------+\r | |
802 | ah * bh [hh]\r | |
803 | +-------+-------+\r | |
804 | _________________________________\r | |
805 | \r | |
806 | product\r | |
807 | +-------+-------+\r | |
808 | \r | |
809 | However, the FMP algorithm is implemented directly from the microcode to\r | |
810 | preserve the fidelity of the simulation, i.e., to lose the same amount\r | |
811 | of precision.\r | |
812 | */\r | |
813 | \r | |
814 | static void multiply (FPU *product, FPU multiplicand, FPU multiplier)\r | |
815 | {\r | |
816 | uint32 ah, al, bh, bl, sign = 0;\r | |
817 | t_uint64 hh, hl, lh, ll, carry;\r | |
818 | int16 ch, cl, dh, dl;\r | |
819 | t_bool firmware;\r | |
820 | \r | |
821 | product->precision = multiplicand.precision; /* set precision */\r | |
822 | \r | |
823 | if ((multiplicand.mantissa == 0) || /* 0 * X = 0 */\r | |
824 | (multiplier.mantissa == 0)) /* X * 0 = 0 */\r | |
825 | product->mantissa = product->exponent = 0;\r | |
826 | \r | |
827 | else {\r | |
828 | firmware = (UNIT_CPU_MODEL != UNIT_1000_F); /* set firmware flag */\r | |
829 | \r | |
830 | if (!firmware || (product->precision != fp_f)) { /* hardware? */\r | |
831 | if (multiplicand.mantissa < 0) { /* negative? */\r | |
832 | complement (&multiplicand); /* complement operand */\r | |
833 | sign = ~sign; /* track sign */\r | |
834 | }\r | |
835 | if (multiplier.mantissa < 0) { /* negative? */\r | |
836 | complement (&multiplier); /* complement operand */\r | |
837 | sign = ~sign; /* track sign */\r | |
838 | }\r | |
839 | }\r | |
840 | \r | |
841 | product->exponent = /* compute exponent */\r | |
842 | multiplicand.exponent + multiplier.exponent + 1;\r | |
843 | \r | |
844 | ah = (uint32) (multiplicand.mantissa >> 32); /* split multiplicand */\r | |
845 | al = (uint32) (multiplicand.mantissa & DMASK32); /* into high and low parts */\r | |
846 | bh = (uint32) (multiplier.mantissa >> 32); /* split multiplier */\r | |
847 | bl = (uint32) (multiplier.mantissa & DMASK32); /* into high and low parts */\r | |
848 | \r | |
849 | if (firmware && (product->precision == fp_f)) { /* single-precision firmware? */\r | |
850 | ch = (int16) (ah >> 16) & DMASK; /* split 32-bit multiplicand */\r | |
851 | cl = (int16) (ah & 0xfffe); /* into high and low parts */\r | |
852 | dh = (int16) (bh >> 16) & DMASK; /* split 32-bit multiplier */\r | |
853 | dl = (int16) (bh & 0xfffe); /* into high and low parts */\r | |
854 | \r | |
855 | hh = (t_uint64) (((int32) ch * dh) & ~1); /* form cross products */\r | |
856 | hl = (t_uint64) (((t_int64) ch * (t_int64) (uint16) dl +\r | |
857 | (t_int64) dh * (t_int64) (uint16) cl) &\r | |
858 | 0xfffffffffffe0000);\r | |
859 | \r | |
860 | product->mantissa = (t_uint64) (((t_int64) hh << 32) + /* sum partials */\r | |
861 | ((t_int64) hl << 16));\r | |
862 | }\r | |
863 | \r | |
864 | else {\r | |
865 | hh = ((t_uint64) ah * bh); /* form four cross products */\r | |
866 | hl = ((t_uint64) ah * bl); /* using 32 x 32 = */\r | |
867 | lh = ((t_uint64) al * bh); /* 64-bit multiplies */\r | |
868 | ll = ((t_uint64) al * bl);\r | |
869 | \r | |
870 | carry = ((ll >> 32) + (uint32) hl + (uint32) lh) >> 32; /* form carry */\r | |
871 | \r | |
872 | product->mantissa = hh + (hl >> 32) + (lh >> 32) + carry; /* sum partials */\r | |
873 | \r | |
874 | if (sign) /* negate if required */\r | |
875 | complement (product);\r | |
876 | }\r | |
877 | }\r | |
878 | return;\r | |
879 | }\r | |
880 | \r | |
881 | \r | |
882 | /* Divide two unpacked numbers.\r | |
883 | \r | |
884 | As with multiply, the single-precision firmware (FDV) operates differently\r | |
885 | from the firmware extended-precision (.XDIV) and the hardware divisions of\r | |
886 | any precision. Firmware implementations utilize a "divide and correct"\r | |
887 | algorithm, wherein the quotient is estimated and then corrected by comparing\r | |
888 | the dividend to the product of the quotient and the divisor. The hardware\r | |
889 | uses a series of shifts and subtracts. This means that firmware FDV and\r | |
890 | hardware FDV once again return slightly different values.\r | |
891 | \r | |
892 | Under simulation, the classic divide-and-correct method is employed, using\r | |
893 | 64-bit / 32-bit = 32-bit divisions. This involves dividing the 64-bit\r | |
894 | dividend "a1a2a3a4" by the first 32-bit digit "b1b2" of the 64-bit divisor\r | |
895 | "b1b2b3b4". The resulting 32-bit quotient is ...\r | |
896 | \r | |
897 | The microcoded single-precision division avoids overflows by right-shifting\r | |
898 | some values, which leads to a loss of precision in the LSBs. We duplicate\r | |
899 | the firmware algorithm here to preserve the fidelity of the simulation.\r | |
900 | */\r | |
901 | \r | |
902 | static void divide (FPU *quotient, FPU dividend, FPU divisor)\r | |
903 | {\r | |
904 | uint32 sign = 0;\r | |
905 | t_int64 bh, bl, r1, r0, p1, p0;\r | |
906 | t_uint64 q, q1, q0;\r | |
907 | t_bool firmware;\r | |
908 | int32 ah, div, cp;\r | |
909 | int16 dh, dl, pq1, pq2, cq;\r | |
910 | \r | |
911 | quotient->precision = dividend.precision; /* set precision */\r | |
912 | \r | |
913 | if (divisor.mantissa == 0) { /* division by zero? */\r | |
914 | if (dividend.mantissa < 0)\r | |
915 | quotient->mantissa = FP_MSIGN; /* return minus infinity */\r | |
916 | else\r | |
917 | quotient->mantissa = ~FP_MSIGN; /* or plus infinity */\r | |
918 | quotient->exponent = FP_MAXPEXP + 1;\r | |
919 | }\r | |
920 | \r | |
921 | else if (dividend.mantissa == 0) /* dividend zero? */\r | |
922 | quotient->mantissa = quotient->exponent = 0; /* yes; result is zero */\r | |
923 | \r | |
924 | else {\r | |
925 | firmware = (UNIT_CPU_MODEL != UNIT_1000_F); /* set firmware flag */\r | |
926 | \r | |
927 | if (!firmware || (quotient->precision != fp_f)) { /* hardware or FFP? */\r | |
928 | if (dividend.mantissa < 0) { /* negative? */\r | |
929 | complement (÷nd); /* complement operand */\r | |
930 | sign = ~sign; /* track sign */\r | |
931 | }\r | |
932 | if (divisor.mantissa < 0) { /* negative? */\r | |
933 | complement (&divisor); /* complement operand */\r | |
934 | sign = ~sign; /* track sign */\r | |
935 | }\r | |
936 | }\r | |
937 | \r | |
938 | quotient->exponent = /* division subtracts exponents */\r | |
939 | dividend.exponent - divisor.exponent;\r | |
940 | \r | |
941 | bh = divisor.mantissa >> 32; /* split divisor */\r | |
942 | bl = divisor.mantissa & DMASK32; /* into high and low parts */\r | |
943 | \r | |
944 | if (firmware && (quotient->precision == fp_f)) { /* single-precision firmware? */\r | |
945 | quotient->exponent = quotient->exponent + 1; /* fix exponent */\r | |
946 | \r | |
947 | ah = (int32) (dividend.mantissa >> 32); /* split dividend */\r | |
948 | dh = (int16) (bh >> 16); /* split divisor again */\r | |
949 | dl = (int16) bh;\r | |
950 | \r | |
951 | div = ah >> 2; /* ASR 2 to prevent overflow */\r | |
952 | \r | |
953 | pq1 = div / dh; /* form first partial quotient */\r | |
954 | div = ((div % dh) & ~1) << 15; /* ASR 1, move rem to upper */\r | |
955 | pq2 = div / dh; /* form second partial quotient */\r | |
956 | \r | |
957 | div = (uint16) dl << 13; /* move divisor LSB to upper, LSR 3 */\r | |
958 | cq = div / dh; /* form correction quotient */\r | |
959 | cp = -cq * pq1; /* and correction product */\r | |
960 | \r | |
961 | cp = (((cp >> 14) & ~3) + (int32) pq2) << 1; /* add corr prod and 2nd partial quo */\r | |
962 | quotient->mantissa = /* add 1st partial quo and align */\r | |
963 | (t_uint64) (((int32) pq1 << 16) + cp) << 32;\r | |
964 | }\r | |
965 | \r | |
966 | else { /* hardware or FFP */\r | |
967 | q1 = (t_uint64) (dividend.mantissa / bh); /* form 1st trial quotient */\r | |
968 | r1 = dividend.mantissa % bh; /* and remainder */\r | |
969 | p1 = (r1 << 24) - (bl >> 8) * q1; /* calculate correction */\r | |
970 | \r | |
971 | while (p1 < 0) { /* correction needed? */\r | |
972 | q1 = q1 - 1; /* trial quotient too large */\r | |
973 | p1 = p1 + (divisor.mantissa >> 8); /* increase remainder */\r | |
974 | }\r | |
975 | \r | |
976 | q0 = (t_uint64) ((p1 << 8) / bh); /* form 2nd trial quotient */\r | |
977 | r0 = (p1 << 8) % bh; /* and remainder */\r | |
978 | p0 = (r0 << 24) - (bl >> 8) * q0; /* calculate correction */\r | |
979 | \r | |
980 | while (p0 < 0) { /* correction needed? */\r | |
981 | q0 = q0 - 1; /* trial quotient too large */\r | |
982 | p0 = p0 + (divisor.mantissa >> 8); /* increase remainder */\r | |
983 | }\r | |
984 | \r | |
985 | q = (q1 << 32) + q0; /* sum quotient digits */\r | |
986 | \r | |
987 | if (q1 & 0xffffffff00000000) { /* did we lose MSB? */\r | |
988 | q = (q >> 1) | 0x8000000000000000; /* shift right and replace bit */\r | |
989 | quotient->exponent = quotient->exponent + 1;/* bump exponent for shift */\r | |
990 | }\r | |
991 | \r | |
992 | if (q & 0x8000000000000000) /* lose normalization? */\r | |
993 | q = q >> 1; /* correct */\r | |
994 | \r | |
995 | quotient->mantissa = (t_int64) q;\r | |
996 | }\r | |
997 | \r | |
998 | if (sign)\r | |
999 | complement (quotient); /* negate if required */\r | |
1000 | }\r | |
1001 | return;\r | |
1002 | }\r | |
1003 | \r | |
1004 | \r | |
1005 | /* Fix an unpacked number.\r | |
1006 | \r | |
1007 | A floating-point value is converted to an integer. The desired precision of\r | |
1008 | the result (single or double integer) must be set before calling.\r | |
1009 | \r | |
1010 | Values less than 0.5 (i.e., with negative exponents) underflow to zero. If\r | |
1011 | the value exceeds the specified integer range, the maximum integer value is\r | |
1012 | returned and overflow is set. Otherwise, the floating-point value is\r | |
1013 | right-shifted to zero the exponent. The result is then rounded.\r | |
1014 | */\r | |
1015 | \r | |
1016 | static uint32 fix (FPU *result, FPU operand)\r | |
1017 | {\r | |
1018 | uint32 overflow;\r | |
1019 | t_bool bits_lost;\r | |
1020 | \r | |
1021 | if (operand.exponent < 0) { /* value < 0.5? */\r | |
1022 | result->mantissa = 0; /* result rounds to zero */\r | |
1023 | overflow = 0; /* clear for underflow */\r | |
1024 | }\r | |
1025 | \r | |
1026 | else if (operand.exponent > /* value > integer size? */\r | |
1027 | (int32) op_bits[result->precision]) {\r | |
1028 | result->mantissa = /* return max int value */\r | |
1029 | (t_uint64) int_p_max[result->precision] <<\r | |
1030 | op_start[result->precision];\r | |
1031 | overflow = 1; /* and set overflow */\r | |
1032 | }\r | |
1033 | \r | |
1034 | else { /* value in range */\r | |
1035 | bits_lost = asr (&operand, /* shift to zero exponent */\r | |
1036 | op_bits[result->precision] - operand.exponent);\r | |
1037 | \r | |
1038 | if (operand.mantissa < 0) { /* value negative? */\r | |
1039 | if (bits_lost) /* bits lost? */\r | |
1040 | operand.mantissa = operand.mantissa | 1; /* include one for rounding */\r | |
1041 | \r | |
1042 | operand.mantissa = operand.mantissa + /* round result */\r | |
1043 | p_half_lsb[result->precision];\r | |
1044 | }\r | |
1045 | \r | |
1046 | result->mantissa = operand.mantissa & /* mask to precision */\r | |
1047 | op_mask[result->precision];\r | |
1048 | overflow = 0;\r | |
1049 | }\r | |
1050 | \r | |
1051 | result->exponent = 0; /* tidy up for integer value */\r | |
1052 | return overflow;\r | |
1053 | }\r | |
1054 | \r | |
1055 | \r | |
1056 | /* Float an integer to an unpacked number.\r | |
1057 | \r | |
1058 | An integer is converted to a floating-point value. The desired precision of\r | |
1059 | the result must be set before calling.\r | |
1060 | \r | |
1061 | Conversion is simply a matter of copying the integer value, setting an\r | |
1062 | exponent that reflects the right-aligned position of the bits, and\r | |
1063 | normalizing.\r | |
1064 | */\r | |
1065 | \r | |
1066 | static void ffloat (FPU *result, FPU operand)\r | |
1067 | {\r | |
1068 | result->mantissa = operand.mantissa; /* set value */\r | |
1069 | result->exponent = op_bits[operand.precision]; /* set exponent */\r | |
1070 | normalize (result); /* normalize */\r | |
1071 | return;\r | |
1072 | }\r | |
1073 | \r | |
1074 | \r | |
1075 | \r | |
1076 | /* High-level floating-point routines. */\r | |
1077 | \r | |
1078 | \r | |
1079 | /* Determine operand precisions.\r | |
1080 | \r | |
1081 | The precisions of the operands and result are determined by decoding an\r | |
1082 | operation opcode and returned to the caller. Pass NULL for both of the\r | |
1083 | operands if only the result precision is wanted. Pass NULL for the result if\r | |
1084 | only the operand precisions are wanted.\r | |
1085 | */\r | |
1086 | \r | |
1087 | void fp_prec (uint16 opcode, OPSIZE *operand_l, OPSIZE *operand_r, OPSIZE *result)\r | |
1088 | {\r | |
1089 | OPSIZE fp_size, int_size;\r | |
1090 | \r | |
1091 | fp_size = (OPSIZE) ((opcode & 0003) + 2); /* fp_f, fp_x, fp_t, fp_e */\r | |
1092 | int_size = (OPSIZE) ((opcode & 0004) >> 2); /* in_s, in_d */\r | |
1093 | \r | |
1094 | if (operand_l && operand_r) { /* want operand precisions? */\r | |
1095 | switch (opcode & 0120) { /* mask out opcode bit 5 */\r | |
1096 | case 0000: /* add/mpy */\r | |
1097 | case 0020: /* sub/div */\r | |
1098 | *operand_l = fp_size; /* assume first op is fp */\r | |
1099 | \r | |
1100 | if (opcode & 0004) /* operand internal? */\r | |
1101 | *operand_r = fp_a; /* second op is accum */\r | |
1102 | else\r | |
1103 | *operand_r = fp_size; /* second op is fp */\r | |
1104 | break;\r | |
1105 | \r | |
1106 | case 0100: /* fix/accum as integer */\r | |
1107 | *operand_l = fp_size; /* first op is fp */\r | |
1108 | *operand_r = fp_a; /* second op is always null */\r | |
1109 | break;\r | |
1110 | \r | |
1111 | case 0120: /* flt/accum as float */\r | |
1112 | *operand_l = int_size; /* first op is integer */\r | |
1113 | *operand_r = fp_a; /* second op is always null */\r | |
1114 | break;\r | |
1115 | }\r | |
1116 | \r | |
1117 | if (opcode & 0010) /* operand internal? */\r | |
1118 | *operand_l = fp_a; /* first op is accum */\r | |
1119 | }\r | |
1120 | \r | |
1121 | if (result) /* want result precision? */\r | |
1122 | if ((opcode & 0120) == 0100) /* fix? */\r | |
1123 | *result = int_size; /* result is integer */\r | |
1124 | else /* all others */\r | |
1125 | *result = fp_size; /* result is fp */\r | |
1126 | \r | |
1127 | return;\r | |
1128 | }\r | |
1129 | \r | |
1130 | \r | |
1131 | /* Floating Point Processor executor.\r | |
1132 | \r | |
1133 | The executor simulates the MPP interface between the CPU and the FPP. The\r | |
1134 | operation to be performed is specified by the supplied opcode, which conforms\r | |
1135 | to the FPP hardware interface, as follows:\r | |
1136 | \r | |
1137 | Bits Value Action\r | |
1138 | ---- ----- ----------------------------------------------\r | |
1139 | 7 0 Exponent range is standard (+/-127)\r | |
1140 | 1 Exponent range is expanded (+/-511)\r | |
1141 | \r | |
1142 | 6-4 000 Add\r | |
1143 | 001 Subtract\r | |
1144 | 010 Multiply\r | |
1145 | 011 Divide\r | |
1146 | 100 Fix\r | |
1147 | 101 Float\r | |
1148 | 110 (diagnostic)\r | |
1149 | 111 (diagnostic)\r | |
1150 | \r | |
1151 | 3 0 Left operand is supplied\r | |
1152 | 1 Left operand in accumulator\r | |
1153 | \r | |
1154 | 2 0 Right operand is supplied (ADD/SUB/MPY/DIV)\r | |
1155 | Single integer operation (FIX/FLT)\r | |
1156 | 1 Right operand in accumulator (ADD/SUB/MPY/DIV)\r | |
1157 | Double integer operation (FIX/FLT)\r | |
1158 | \r | |
1159 | 1-0 00 2-word operation\r | |
1160 | 01 3-word operation\r | |
1161 | 10 4-word operation\r | |
1162 | 11 5-word operation\r | |
1163 | \r | |
1164 | If the opcode specifies that the left (or right) operand is in the\r | |
1165 | accumulator, then the value supplied for that parameter is not used. All\r | |
1166 | results are automatically left in the accumulator. If the result is not\r | |
1167 | needed externally, then NULL may be passed for the result parameter.\r | |
1168 | \r | |
1169 | To support accumulator set/get operations under simulation, the opcode is\r | |
1170 | expanded to include a special mode, indicated by bit 15 = 1. In this mode,\r | |
1171 | if the result parameter is NULL, then the accumulator is set from the value\r | |
1172 | passed as operand_l. If the result parameter is not null, then the\r | |
1173 | accumulator value is returned as the result, and operand_l is ignored. The\r | |
1174 | precision of the operation is performed as specified by the OPSIZE value\r | |
1175 | passed in bits 2-0 of the opcode.\r | |
1176 | \r | |
1177 | The function returns 1 if the operation overflows and 0 if not.\r | |
1178 | */\r | |
1179 | \r | |
1180 | uint32 fp_exec (uint16 opcode, OP *result, OP operand_l, OP operand_r)\r | |
1181 | {\r | |
1182 | static FPU accumulator;\r | |
1183 | FPU uoperand_l, uoperand_r;\r | |
1184 | OPSIZE op_l_prec, op_r_prec, rslt_prec;\r | |
1185 | uint32 overflow;\r | |
1186 | \r | |
1187 | if (opcode & SIGN) { /* accumulator mode? */\r | |
1188 | rslt_prec = (OPSIZE) (opcode & 0017); /* get operation precision */\r | |
1189 | \r | |
1190 | if (result) { /* get accumulator? */\r | |
1191 | op_l_prec = accumulator.precision; /* save accum prec temp */\r | |
1192 | accumulator.precision = rslt_prec; /* set desired precision */\r | |
1193 | *result = pack (accumulator); /* pack accumulator */\r | |
1194 | accumulator.precision = op_l_prec; /* restore correct prec */\r | |
1195 | }\r | |
1196 | else /* set accumulator */\r | |
1197 | accumulator = unpack (operand_l, rslt_prec); /* unpack from operand */\r | |
1198 | \r | |
1199 | return 0; /* no overflow from accum ops */\r | |
1200 | }\r | |
1201 | \r | |
1202 | fp_prec (opcode, &op_l_prec, &op_r_prec, &rslt_prec); /* calc precs from opcode */\r | |
1203 | \r | |
1204 | if (op_l_prec == fp_a) /* left operand in accum? */\r | |
1205 | uoperand_l = accumulator; /* copy it */\r | |
1206 | else /* operand supplied */\r | |
1207 | uoperand_l = unpack (operand_l, op_l_prec); /* unpack from parameter */\r | |
1208 | \r | |
1209 | if (op_r_prec == fp_a) /* right operand in accum? */\r | |
1210 | uoperand_r = accumulator; /* copy it */\r | |
1211 | else /* operand supplied */\r | |
1212 | uoperand_r = unpack (operand_r, op_r_prec); /* unpack from parameter */\r | |
1213 | \r | |
1214 | \r | |
1215 | switch (opcode & 0160) { /* dispatch operation */\r | |
1216 | \r | |
1217 | case 0000: /* add */\r | |
1218 | add (&accumulator, uoperand_l, uoperand_r);\r | |
1219 | break;\r | |
1220 | \r | |
1221 | case 0020: /* subtract */\r | |
1222 | complement (&uoperand_r);\r | |
1223 | add (&accumulator, uoperand_l, uoperand_r);\r | |
1224 | break;\r | |
1225 | \r | |
1226 | case 0040: /* multiply */\r | |
1227 | multiply (&accumulator, uoperand_l, uoperand_r);\r | |
1228 | break;\r | |
1229 | \r | |
1230 | case 0060: /* divide */\r | |
1231 | divide (&accumulator, uoperand_l, uoperand_r);\r | |
1232 | break;\r | |
1233 | \r | |
1234 | case 0100: /* fix */\r | |
1235 | accumulator.precision = rslt_prec;\r | |
1236 | overflow = fix (&accumulator, uoperand_l);\r | |
1237 | \r | |
1238 | if (result) /* result wanted? */\r | |
1239 | *result = pack_int (accumulator.mantissa, /* pack integer */\r | |
1240 | rslt_prec);\r | |
1241 | return overflow;\r | |
1242 | \r | |
1243 | case 0120: /* float */\r | |
1244 | accumulator.precision = rslt_prec;\r | |
1245 | ffloat (&accumulator, uoperand_l);\r | |
1246 | \r | |
1247 | if (result) /* result wanted? */\r | |
1248 | *result = pack (accumulator); /* pack FP (FLT does not round) */\r | |
1249 | return 0;\r | |
1250 | \r | |
1251 | case 0140: /* (diagnostic) */\r | |
1252 | case 0160: /* (diagnostic) */\r | |
1253 | return 0;\r | |
1254 | }\r | |
1255 | \r | |
1256 | if (UNIT_CPU_MODEL != UNIT_1000_F) /* firmware implementation? */\r | |
1257 | accumulator.mantissa = accumulator.mantissa & /* mask to precision */\r | |
1258 | op_mask[accumulator.precision];\r | |
1259 | \r | |
1260 | normalize (&accumulator); /* normalize */\r | |
1261 | overflow = roundovf (&accumulator, opcode & 0200); /* round and check for overflow */\r | |
1262 | \r | |
1263 | if (result) /* result wanted? */\r | |
1264 | *result = pack (accumulator); /* pack result */\r | |
1265 | \r | |
1266 | return overflow;\r | |
1267 | }\r | |
1268 | \r | |
1269 | \r | |
1270 | /* Set or get accumulator at desired precision.\r | |
1271 | \r | |
1272 | This function provides access to the FPP accumulator. In hardware, the\r | |
1273 | accumulator may be read at a given precision by sending the FPP an opcode\r | |
1274 | encoded with the desired precision and then reading words from the FPP\r | |
1275 | /without/ initiating the operation, i.e., without starting the processor.\r | |
1276 | \r | |
1277 | Under simulation, pass this function a NULL operand and the desired\r | |
1278 | precision to read the accumulator. Pass a pointer to an operand and the\r | |
1279 | desired precision to set the accumulator; the return value in this case is\r | |
1280 | not defined.\r | |
1281 | */\r | |
1282 | \r | |
1283 | OP fp_accum (const OP *operand, OPSIZE precision)\r | |
1284 | {\r | |
1285 | OP result = NOP;\r | |
1286 | uint16 opcode = (uint16) precision | SIGN; /* add special mode bit */\r | |
1287 | \r | |
1288 | if (operand)\r | |
1289 | fp_exec (opcode, NULL, *operand, NOP); /* set accum */\r | |
1290 | else\r | |
1291 | fp_exec (opcode, &result, NOP, NOP); /* get accum */\r | |
1292 | return result;\r | |
1293 | }\r | |
1294 | \r | |
1295 | \r | |
1296 | /* Pack an unpacked floating-point number.\r | |
1297 | \r | |
1298 | An unpacked mantissa is passed as a "packed" number with an unused exponent.\r | |
1299 | The mantissa and separately-passed exponent are packed into the in-memory\r | |
1300 | floating-point format. Note that all bits are significant in the mantissa\r | |
1301 | (no masking is done).\r | |
1302 | */\r | |
1303 | \r | |
1304 | uint32 fp_pack (OP *result, OP mantissa, int32 exponent, OPSIZE precision)\r | |
1305 | {\r | |
1306 | FPU unpacked;\r | |
1307 | \r | |
1308 | unpacked.mantissa = unpack_int (mantissa, precision); /* unpack mantissa */\r | |
1309 | unpacked.exponent = exponent; /* set exponent */\r | |
1310 | unpacked.precision = precision; /* set precision */\r | |
1311 | *result = pack (unpacked); /* pack them */\r | |
1312 | return 0;\r | |
1313 | }\r | |
1314 | \r | |
1315 | \r | |
1316 | /* Normalize, round, and pack an unpacked floating-point number.\r | |
1317 | \r | |
1318 | An unpacked mantissa is passed as a "packed" number with an unused exponent.\r | |
1319 | The mantissa and separately-passed exponent are normalized, rounded, and\r | |
1320 | packed into the in-memory floating-point format. Note that all bits are\r | |
1321 | significant in the mantissa (no masking is done).\r | |
1322 | */\r | |
1323 | \r | |
1324 | uint32 fp_nrpack (OP *result, OP mantissa, int32 exponent, OPSIZE precision)\r | |
1325 | {\r | |
1326 | FPU unpacked;\r | |
1327 | \r | |
1328 | unpacked.mantissa = unpack_int (mantissa, precision); /* unpack mantissa */\r | |
1329 | unpacked.exponent = exponent; /* set exponent */\r | |
1330 | unpacked.precision = precision; /* set precision */\r | |
1331 | return nrpack (result, unpacked, FALSE); /* norm/rnd/pack them */\r | |
1332 | }\r | |
1333 | \r | |
1334 | \r | |
1335 | /* Unpack a packed floating-point number.\r | |
1336 | \r | |
1337 | A floating-point number, packed into the in-memory format, is unpacked into\r | |
1338 | separate mantissa and exponent values. The unpacked mantissa is returned in\r | |
1339 | a "packed" structure with an exponent of zero. Mantissa or exponent may be\r | |
1340 | null if that part isn't wanted.\r | |
1341 | */\r | |
1342 | \r | |
1343 | uint32 fp_unpack (OP *mantissa, int32 *exponent, OP packed, OPSIZE precision)\r | |
1344 | \r | |
1345 | {\r | |
1346 | FPU unpacked;\r | |
1347 | \r | |
1348 | unpacked = unpack (packed, precision); /* unpack mantissa and exponent */\r | |
1349 | \r | |
1350 | if (exponent) /* exponent wanted? */\r | |
1351 | *exponent = unpacked.exponent; /* return exponent */\r | |
1352 | \r | |
1353 | if (mantissa) /* mantissa wanted? */\r | |
1354 | *mantissa = pack_int (unpacked.mantissa, fp_t); /* return full-size mantissa */\r | |
1355 | return 0;\r | |
1356 | }\r | |
1357 | \r | |
1358 | \r | |
1359 | /* Complement an unpacked mantissa.\r | |
1360 | \r | |
1361 | An unpacked mantissa is passed as a "packed" number with a zero exponent.\r | |
1362 | The exponent increment, i.e., either zero or one, depending on whether a\r | |
1363 | renormalization was required, is returned. Note that all bits are\r | |
1364 | significant in the mantissa.\r | |
1365 | */\r | |
1366 | \r | |
1367 | uint32 fp_ucom (OP *mantissa, OPSIZE precision)\r | |
1368 | {\r | |
1369 | FPU unpacked;\r | |
1370 | \r | |
1371 | unpacked.mantissa = unpack_int (*mantissa, precision); /* unpack mantissa */\r | |
1372 | unpacked.exponent = 0; /* clear undefined exponent */\r | |
1373 | unpacked.precision = precision; /* set precision */\r | |
1374 | complement (&unpacked); /* negate it */\r | |
1375 | *mantissa = pack_int (unpacked.mantissa, precision); /* replace mantissa */\r | |
1376 | return (uint32) unpacked.exponent; /* return exponent increment */\r | |
1377 | }\r | |
1378 | \r | |
1379 | \r | |
1380 | /* Complement a floating-point number. */\r | |
1381 | \r | |
1382 | uint32 fp_pcom (OP *packed, OPSIZE precision)\r | |
1383 | {\r | |
1384 | FPU unpacked;\r | |
1385 | \r | |
1386 | unpacked = unpack (*packed, precision); /* unpack the number */\r | |
1387 | complement (&unpacked); /* negate it */\r | |
1388 | return nrpack (packed, unpacked, FALSE); /* and norm/rnd/pack */\r | |
1389 | }\r | |
1390 | \r | |
1391 | \r | |
1392 | /* Truncate a floating-point number. */\r | |
1393 | \r | |
1394 | uint32 fp_trun (OP *result, OP source, OPSIZE precision)\r | |
1395 | {\r | |
1396 | t_bool bits_lost;\r | |
1397 | FPU unpacked;\r | |
1398 | FPU one = { FP_ONEHALF, 1, 0 }; /* 0.5 * 2 ** 1 = 1.0 */\r | |
1399 | OP zero = { { 0, 0, 0, 0, 0 } }; /* 0.0 */\r | |
1400 | t_uint64 mask = mant_mask[precision] & ~FP_MSIGN;\r | |
1401 | \r | |
1402 | unpacked = unpack (source, precision);\r | |
1403 | if (unpacked.exponent < 0) /* number < 0.5? */\r | |
1404 | *result = zero; /* return 0 */\r | |
1405 | else if (unpacked.exponent >= (int32) op_bits[precision]) /* no fractional bits? */\r | |
1406 | *result = source; /* already integer */\r | |
1407 | else {\r | |
1408 | mask = (mask >> unpacked.exponent) & mask; /* mask fractional bits */\r | |
1409 | bits_lost = ((unpacked.mantissa & mask) != 0); /* flag if bits lost */\r | |
1410 | unpacked.mantissa = unpacked.mantissa & ~mask; /* mask off fraction */\r | |
1411 | if ((unpacked.mantissa < 0) && bits_lost) /* negative? */\r | |
1412 | add (&unpacked, unpacked, one); /* truncate toward zero */\r | |
1413 | nrpack (result, unpacked, FALSE); /* (overflow cannot occur) */\r | |
1414 | }\r | |
1415 | return 0; /* clear overflow on return */\r | |
1416 | }\r | |
1417 | \r | |
1418 | \r | |
1419 | /* Convert a floating-point number from one precision to another. */\r | |
1420 | \r | |
1421 | uint32 fp_cvt (OP *result, OPSIZE source_precision, OPSIZE dest_precision)\r | |
1422 | {\r | |
1423 | FPU unpacked;\r | |
1424 | \r | |
1425 | unpacked = unpack (*result, source_precision);\r | |
1426 | unpacked.precision = dest_precision;\r | |
1427 | return nrpack (result, unpacked, FALSE); /* norm/rnd/pack */\r | |
1428 | }\r | |
1429 | \r | |
1430 | \r | |
1431 | #endif /* end of int64 support */\r |