Commit | Line | Data |
---|---|---|
196ba1fc PH |
1 | /* pdp10_mdfp.c: PDP-10 multiply/divide and floating point simulator\r |
2 | \r | |
3 | Copyright (c) 1993-2005, Robert M Supnik\r | |
4 | \r | |
5 | Permission is hereby granted, free of charge, to any person obtaining a\r | |
6 | copy of this software and associated documentation files (the "Software"),\r | |
7 | to deal in the Software without restriction, including without limitation\r | |
8 | the rights to use, copy, modify, merge, publish, distribute, sublicense,\r | |
9 | and/or sell copies of the Software, and to permit persons to whom the\r | |
10 | Software is furnished to do so, subject to the following conditions:\r | |
11 | \r | |
12 | The above copyright notice and this permission notice shall be included in\r | |
13 | all copies or substantial portions of the Software.\r | |
14 | \r | |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\r | |
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\r | |
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\r | |
18 | ROBERT M SUPNIK BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER\r | |
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r | |
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\r | |
21 | \r | |
22 | Except as contained in this notice, the name of Robert M Supnik shall not be\r | |
23 | used in advertising or otherwise to promote the sale, use or other dealings\r | |
24 | in this Software without prior written authorization from Robert M Supnik.\r | |
25 | \r | |
26 | 2-Apr-04 RMS Fixed bug in floating point unpack\r | |
27 | Fixed bug in FIXR (found by Phil Stone, fixed by\r | |
28 | Chris Smith)\r | |
29 | 31-Aug-01 RMS Changed int64 to t_int64 for Windoze\r | |
30 | 10-Aug-01 RMS Removed register in declarations\r | |
31 | \r | |
32 | Instructions handled in this module:\r | |
33 | imul integer multiply\r | |
34 | idiv integer divide\r | |
35 | mul multiply\r | |
36 | div divide\r | |
37 | dmul double precision multiply\r | |
38 | ddiv double precision divide\r | |
39 | fad(r) floating add (and round)\r | |
40 | fsb(r) floating subtract (and round)\r | |
41 | fmp(r) floating multiply (and round)\r | |
42 | fdv(r) floating divide and round\r | |
43 | fsc floating scale\r | |
44 | fix(r) floating to fixed (and round)\r | |
45 | fltr fixed to floating and round\r | |
46 | dfad double precision floating add/subtract\r | |
47 | dfmp double precision floating multiply\r | |
48 | dfdv double precision floating divide\r | |
49 | \r | |
50 | The PDP-10 stores double (quad) precision integers in sequential\r | |
51 | AC's or memory locations. Integers are stored in 2's complement\r | |
52 | form. Only the sign of the high order word matters; the signs\r | |
53 | in low order words are ignored on input and set to the sign of\r | |
54 | the result on output. Quad precision integers exist only in the\r | |
55 | AC's as the result of a DMUL or the dividend of a DDIV.\r | |
56 | \r | |
57 | 0 00000000011111111112222222222333333\r | |
58 | 0 12345678901234567890123456789012345\r | |
59 | +-+-----------------------------------+\r | |
60 | |S| high order integer | AC(n), A\r | |
61 | +-+-----------------------------------+\r | |
62 | |S| low order integer | AC(n + 1), A + 1\r | |
63 | +-+-----------------------------------+\r | |
64 | |S| low order integer | AC(n + 2)\r | |
65 | +-+-----------------------------------+\r | |
66 | |S| low order integer | AC(n + 3)\r | |
67 | +-+-----------------------------------+\r | |
68 | \r | |
69 | The PDP-10 supports two floating point formats: single and double\r | |
70 | precision. In both, the exponent is 8 bits, stored in excess\r | |
71 | 128 notation. The fraction is expected to be normalized. A\r | |
72 | single precision floating point number has 27 bits of fraction;\r | |
73 | a double precision number has 62 bits of fraction (the sign\r | |
74 | bit of the second word is ignored and is set to zero).\r | |
75 | \r | |
76 | In a negative floating point number, the exponent is stored in\r | |
77 | one's complement form, the fraction in two's complement form.\r | |
78 | \r | |
79 | 0 00000000 011111111112222222222333333\r | |
80 | 0 12345678 901234567890123456789012345\r | |
81 | +-+--------+---------------------------+\r | |
82 | |S|exponent| high order fraction | AC(n), A\r | |
83 | +-+--------+---------------------------+\r | |
84 | |0| low order fraction | AC(n + 1), A + 1\r | |
85 | +-+------------------------------------+\r | |
86 | \r | |
87 | Note that treatment of the sign is different for double precision\r | |
88 | integers and double precision floating point. DMOVN (implemented\r | |
89 | as an inline macro) follows floating point conventions.\r | |
90 | \r | |
91 | The original PDP-10 CPU (KA10) used a different format for double\r | |
92 | precision numbers and included certain instructions to make\r | |
93 | software support easier. These instructions were phased out in\r | |
94 | the KL10 and KS10 and are treated as MUUO's.\r | |
95 | \r | |
96 | The KL10 added extended precision (11-bit exponent) floating point\r | |
97 | format (so-called G floating). These instructions were not\r | |
98 | implemented in the KS10 and are treated as MUUO's.\r | |
99 | */\r | |
100 | \r | |
101 | #include "pdp10_defs.h"\r | |
102 | #include <setjmp.h>\r | |
103 | \r | |
104 | typedef struct { /* unpacked fp number */\r | |
105 | int32 sign; /* sign */\r | |
106 | int32 exp; /* exponent */\r | |
107 | t_uint64 fhi; /* fraction high */\r | |
108 | t_uint64 flo; /* for double prec */\r | |
109 | } UFP;\r | |
110 | \r | |
111 | #define MSK32 0xFFFFFFFF\r | |
112 | #define FIT27 (DMASK - 0x07FFFFFF)\r | |
113 | #define FIT32 (DMASK - MSK32)\r | |
114 | #define SFRC TRUE /* frac 2's comp */\r | |
115 | #define AFRC FALSE /* frac abs value */\r | |
116 | \r | |
117 | /* In packed floating point number */\r | |
118 | \r | |
119 | #define FP_BIAS 0200 /* exponent bias */\r | |
120 | #define FP_N_FHI 27 /* # of hi frac bits */\r | |
121 | #define FP_V_FHI 0 /* must be zero */\r | |
122 | #define FP_M_FHI 0000777777777\r | |
123 | #define FP_N_EXP 8 /* # of exp bits */\r | |
124 | #define FP_V_EXP (FP_V_FHI + FP_N_FHI)\r | |
125 | #define FP_M_EXP 0377\r | |
126 | #define FP_V_SIGN (FP_V_EXP + FP_N_EXP) /* sign */\r | |
127 | #define FP_N_FLO 35 /* # of lo frac bits */\r | |
128 | #define FP_V_FLO 0 /* must be zero */\r | |
129 | #define FP_M_FLO 0377777777777\r | |
130 | #define GET_FPSIGN(x) ((int32) (((x) >> FP_V_SIGN) & 1))\r | |
131 | #define GET_FPEXP(x) ((int32) (((x) >> FP_V_EXP) & FP_M_EXP))\r | |
132 | #define GET_FPHI(x) ((x) & FP_M_FHI)\r | |
133 | #define GET_FPLO(x) ((x) & FP_M_FLO)\r | |
134 | \r | |
135 | /* In unpacked floating point number */\r | |
136 | \r | |
137 | #define FP_N_GUARD 1 /* # of guard bits */\r | |
138 | #define FP_V_UFLO FP_N_GUARD /* <35:1> */\r | |
139 | #define FP_V_URNDD (FP_V_UFLO - 1) /* dp round bit */\r | |
140 | #define FP_V_UFHI (FP_V_UFLO + FP_N_FLO) /* <62:36> */\r | |
141 | #define FP_V_URNDS (FP_V_UFHI - 1) /* sp round bit */\r | |
142 | #define FP_V_UCRY (FP_V_UFHI + FP_N_FHI) /* <63> */\r | |
143 | #define FP_V_UNORM (FP_V_UCRY - 1) /* normalized bit */\r | |
144 | #define FP_UFHI 0x7FFFFFF000000000\r | |
145 | #define FP_UFLO 0x0000000FFFFFFFFE\r | |
146 | #define FP_UFRAC 0x7FFFFFFFFFFFFFFE\r | |
147 | #define FP_URNDD 0x0000000000000001\r | |
148 | #define FP_URNDS 0x0000000800000000\r | |
149 | #define FP_UNORM 0x4000000000000000\r | |
150 | #define FP_UCRY 0x8000000000000000\r | |
151 | #define FP_ONES 0xFFFFFFFFFFFFFFFF\r | |
152 | \r | |
153 | #define UNEG(x) ((~x) + 1)\r | |
154 | #define DUNEG(x) x.flo = UNEG (x.flo); x.fhi = ~x.fhi + (x.flo == 0)\r | |
155 | \r | |
156 | extern d10 *ac_cur; /* current AC block */\r | |
157 | extern int32 flags; /* flags */\r | |
158 | void mul (d10 a, d10 b, d10 *rs);\r | |
159 | void funpack (d10 h, d10 l, UFP *r, t_bool sgn);\r | |
160 | void fnorm (UFP *r, t_int64 rnd);\r | |
161 | d10 fpack (UFP *r, d10 *lo, t_bool fdvneg);\r | |
162 | \r | |
163 | /* Integer multiply - checked against KS-10 ucode */\r | |
164 | \r | |
165 | d10 imul (d10 a, d10 b)\r | |
166 | {\r | |
167 | d10 rs[2];\r | |
168 | \r | |
169 | if ((a == SIGN) && (b == SIGN)) { /* KS10 hack */\r | |
170 | SETF (F_AOV | F_T1); /* -2**35 squared */\r | |
171 | return SIGN;\r | |
172 | }\r | |
173 | mul (a, b, rs); /* mpy, dprec result */\r | |
174 | if (rs[0] && (rs[0] != ONES)) { /* high not all sign? */\r | |
175 | rs[1] = TSTS (a ^ b)? SETS (rs[1]): CLRS (rs[1]); /* set sign */\r | |
176 | SETF (F_AOV | F_T1); /* overflow */\r | |
177 | }\r | |
178 | return rs[1];\r | |
179 | }\r | |
180 | \r | |
181 | /* Integer divide, return quotient, remainder - checked against KS10 ucode\r | |
182 | The KS10 does not recognize -2^35/-1 as an error. Instead, it produces\r | |
183 | 2^35 (that is, -2^35) as the incorrect result.\r | |
184 | */\r | |
185 | \r | |
186 | t_bool idiv (d10 a, d10 b, d10 *rs)\r | |
187 | {\r | |
188 | d10 dvd = ABS (a); /* make ops positive */\r | |
189 | d10 dvr = ABS (b);\r | |
190 | \r | |
191 | if (dvr == 0) { /* divide by 0? */\r | |
192 | SETF (F_DCK | F_AOV | F_T1); /* set flags, return */\r | |
193 | return FALSE;\r | |
194 | }\r | |
195 | rs[0] = dvd / dvr; /* get quotient */\r | |
196 | rs[1] = dvd % dvr; /* get remainder */\r | |
197 | if (TSTS (a ^ b)) rs[0] = NEG (rs[0]); /* sign of result */\r | |
198 | if (TSTS (a)) rs[1] = NEG (rs[1]); /* sign of remainder */\r | |
199 | return TRUE;\r | |
200 | }\r | |
201 | \r | |
202 | /* Multiply, return double precision result - checked against KS10 ucode */\r | |
203 | \r | |
204 | void mul (d10 s1, d10 s2, d10 *rs)\r | |
205 | {\r | |
206 | t_uint64 a = ABS (s1);\r | |
207 | t_uint64 b = ABS (s2);\r | |
208 | t_uint64 t, u, r;\r | |
209 | \r | |
210 | if ((a == 0) || (b == 0)) { /* operand = 0? */\r | |
211 | rs[0] = rs[1] = 0; /* result 0 */\r | |
212 | return;\r | |
213 | }\r | |
214 | if ((a & FIT32) || (b & FIT32)) { /* fit in 64b? */\r | |
215 | t = a >> 18; /* no, split in half */\r | |
216 | a = a & RMASK; /* "dp" multiply */\r | |
217 | u = b >> 18;\r | |
218 | b = b & RMASK;\r | |
219 | r = (a * b) + (((a * u) + (b * t)) << 18); /* low is only 35b */\r | |
220 | rs[0] = ((t * u) << 1) + (r >> 35); /* so lsh hi 1 */\r | |
221 | rs[1] = r & MMASK;\r | |
222 | }\r | |
223 | else {\r | |
224 | r = a * b; /* fits, native mpy */\r | |
225 | rs[0] = r >> 35; /* split at bit 35 */\r | |
226 | rs[1] = r & MMASK;\r | |
227 | }\r | |
228 | \r | |
229 | if (TSTS (s1 ^ s2)) { MKDNEG (rs); } /* result -? */\r | |
230 | else if (TSTS (rs[0])) { /* result +, 2**70? */\r | |
231 | SETF (F_AOV | F_T1); /* overflow */\r | |
232 | rs[1] = SETS (rs[1]); /* consistent - */\r | |
233 | }\r | |
234 | return;\r | |
235 | }\r | |
236 | \r | |
237 | /* Divide, return quotient and remainder - checked against KS10 ucode\r | |
238 | Note that the initial divide check catches the case -2^70/-2^35;\r | |
239 | thus, the quotient can have at most 35 bits.\r | |
240 | */\r | |
241 | \r | |
242 | t_bool divi (int32 ac, d10 b, d10 *rs)\r | |
243 | {\r | |
244 | int32 p1 = ADDAC (ac, 1);\r | |
245 | d10 dvr = ABS (b); /* make divr positive */\r | |
246 | t_int64 t;\r | |
247 | int32 i;\r | |
248 | d10 dvd[2];\r | |
249 | \r | |
250 | dvd[0] = AC(ac); /* divd high */\r | |
251 | dvd[1] = CLRS (AC(p1)); /* divd lo, clr sgn */\r | |
252 | if (TSTS (AC(ac))) { DMOVN (dvd); } /* make divd positive */\r | |
253 | if (dvd[0] >= dvr) { /* divide fail? */\r | |
254 | SETF (F_AOV | F_DCK | F_T1); /* set flags, return */\r | |
255 | return FALSE;\r | |
256 | }\r | |
257 | if (dvd[0] & FIT27) { /* fit in 63b? */\r | |
258 | for (i = 0, rs[0] = 0; i < 35; i++) { /* 35 quotient bits */\r | |
259 | dvd[0] = (dvd[0] << 1) | ((dvd[1] >> 34) & 1);\r | |
260 | dvd[1] = (dvd[1] << 1) & MMASK; /* shift dividend */\r | |
261 | rs[0] = rs[0] << 1; /* shift quotient */\r | |
262 | if (dvd[0] >= dvr) { /* subtract work? */\r | |
263 | dvd[0] = dvd[0] - dvr; /* quo bit is 1 */\r | |
264 | rs[0] = rs[0] + 1;\r | |
265 | }\r | |
266 | }\r | |
267 | rs[1] = dvd[0]; /* store remainder */\r | |
268 | }\r | |
269 | else {\r | |
270 | t = (dvd[0] << 35) | dvd[1]; /* concatenate */\r | |
271 | rs[0] = t / dvr; /* quotient */\r | |
272 | rs[1] = t % dvr; /* remainder */\r | |
273 | }\r | |
274 | if (TSTS (AC(ac) ^ b)) rs[0] = NEG (rs[0]); /* sign of result */\r | |
275 | if (TSTS (AC(ac))) rs[1] = NEG (rs[1]); /* sign of remainder */\r | |
276 | return TRUE;\r | |
277 | }\r | |
278 | \r | |
279 | /* Double precision multiply. This is done the old fashioned way. Cross\r | |
280 | product multiplies would be a lot faster but would require more code.\r | |
281 | */\r | |
282 | \r | |
283 | void dmul (int32 ac, d10 *mpy)\r | |
284 | {\r | |
285 | int32 p1 = ADDAC (ac, 1);\r | |
286 | int32 p2 = ADDAC (ac, 2);\r | |
287 | int32 p3 = ADDAC (ac, 3);\r | |
288 | int32 i;\r | |
289 | d10 mpc[2], sign;\r | |
290 | \r | |
291 | mpc[0] = AC(ac); /* mplcnd hi */\r | |
292 | mpc[1] = CLRS (AC(p1)); /* mplcnd lo, clr sgn */\r | |
293 | sign = mpc[0] ^ mpy[0]; /* sign of result */\r | |
294 | if (TSTS (mpc[0])) { DMOVN (mpc); } /* get abs (mpcnd) */\r | |
295 | if (TSTS (mpy[0])) { DMOVN (mpy); } /* get abs (mpyer) */\r | |
296 | else mpy[1] = CLRS (mpy[1]); /* clear mpy lo sign */\r | |
297 | AC(ac) = AC(p1) = AC(p2) = AC(p3) = 0; /* clear AC's */\r | |
298 | if (((mpy[0] | mpy[1]) == 0) || ((mpc[0] | mpc[1]) == 0)) return;\r | |
299 | for (i = 0; i < 71; i++) { /* 71 mpyer bits */\r | |
300 | if (i) { /* shift res, mpy */\r | |
301 | AC(p3) = (AC(p3) >> 1) | ((AC(p2) & 1) << 34);\r | |
302 | AC(p2) = (AC(p2) >> 1) | ((AC(p1) & 1) << 34);\r | |
303 | AC(p1) = (AC(p1) >> 1) | ((AC(ac) & 1) << 34);\r | |
304 | AC(ac) = AC(ac) >> 1;\r | |
305 | mpy[1] = (mpy[1] >> 1) | ((mpy[0] & 1) << 34);\r | |
306 | mpy[0] = mpy[0] >> 1;\r | |
307 | }\r | |
308 | if (mpy[1] & 1) { /* if mpy lo bit = 1 */\r | |
309 | AC(p1) = AC(p1) + mpc[1];\r | |
310 | AC(ac) = AC(ac) + mpc[0] + (TSTS (AC(p1) != 0));\r | |
311 | AC(p1) = CLRS (AC(p1));\r | |
312 | }\r | |
313 | }\r | |
314 | if (TSTS (sign)) { /* result minus? */\r | |
315 | AC(p3) = (-AC(p3)) & MMASK; /* quad negate */\r | |
316 | AC(p2) = (~AC(p2) + (AC(p3) == 0)) & MMASK;\r | |
317 | AC(p1) = (~AC(p1) + (AC(p2) == 0)) & MMASK;\r | |
318 | AC(ac) = (~AC(ac) + (AC(p1) == 0)) & DMASK;\r | |
319 | }\r | |
320 | else if (TSTS (AC(ac))) SETF (F_AOV | F_T1); /* wrong sign */\r | |
321 | if (TSTS (AC(ac))) { /* if result - */\r | |
322 | AC(p1) = SETS (AC(p1)); /* make signs consistent */\r | |
323 | AC(p2) = SETS (AC(p2));\r | |
324 | AC(p3) = SETS (AC(p3));\r | |
325 | }\r | |
326 | return;\r | |
327 | }\r | |
328 | \r | |
329 | /* Double precision divide - checked against KS10 ucode */\r | |
330 | \r | |
331 | void ddiv (int32 ac, d10 *dvr)\r | |
332 | {\r | |
333 | int32 i, cryin;\r | |
334 | d10 sign, qu[2], dvd[4];\r | |
335 | \r | |
336 | dvd[0] = AC(ac); /* save dividend */\r | |
337 | for (i = 1; i < 4; i++) dvd[i] = CLRS (AC(ADDAC (ac, i)));\r | |
338 | sign = AC(ac) ^ dvr[0]; /* sign of result */\r | |
339 | if (TSTS (AC(ac))) { /* get abs (dividend) */\r | |
340 | for (i = 3, cryin = 1; i > 0; i--) { /* negate quad */\r | |
341 | dvd[i] = (~dvd[i] + cryin) & MMASK; /* comp + carry in */\r | |
342 | if (dvd[i]) cryin = 0; /* next carry in */\r | |
343 | }\r | |
344 | dvd[0] = (~dvd[0] + cryin) & DMASK;\r | |
345 | }\r | |
346 | if (TSTS (dvr[0])) { DMOVN (dvr); } /* get abs (divisor) */\r | |
347 | else dvr[1] = CLRS (dvr[1]);\r | |
348 | if (DCMPGE (dvd, dvr)) { /* will divide work? */\r | |
349 | SETF (F_AOV | F_DCK | F_T1); /* no, set flags */\r | |
350 | return;\r | |
351 | }\r | |
352 | qu[0] = qu[1] = 0; /* clear quotient */\r | |
353 | for (i = 0; i < 70; i++) { /* 70 quotient bits */\r | |
354 | dvd[0] = ((dvd[0] << 1) | ((dvd[1] >> 34) & 1)) & DMASK;;\r | |
355 | dvd[1] = ((dvd[1] << 1) | ((dvd[2] >> 34) & 1)) & MMASK;\r | |
356 | dvd[2] = ((dvd[2] << 1) | ((dvd[3] >> 34) & 1)) & MMASK;\r | |
357 | dvd[3] = (dvd[3] << 1) & MMASK; /* shift dividend */\r | |
358 | qu[0] = (qu[0] << 1) | ((qu[1] >> 34) & 1); /* shift quotient */\r | |
359 | qu[1] = (qu[1] << 1) & MMASK;\r | |
360 | if (DCMPGE (dvd, dvr)) { /* subtract work? */\r | |
361 | dvd[0] = dvd[0] - dvr[0] - (dvd[1] < dvr[1]);\r | |
362 | dvd[1] = (dvd[1] - dvr[1]) & MMASK; /* do subtract */\r | |
363 | qu[1] = qu[1] + 1; /* set quotient bit */\r | |
364 | }\r | |
365 | }\r | |
366 | if (TSTS (sign) && (qu[0] | qu[1])) { MKDNEG (qu); }\r | |
367 | if (TSTS (AC(ac)) && (dvd[0] | dvd[1])) { MKDNEG (dvd); }\r | |
368 | AC(ac) = qu[0]; /* quotient */\r | |
369 | AC(ADDAC(ac, 1)) = qu[1];\r | |
370 | AC(ADDAC(ac, 2)) = dvd[0]; /* remainder */\r | |
371 | AC(ADDAC(ac, 3)) = dvd[1];\r | |
372 | return;\r | |
373 | }\r | |
374 | \r | |
375 | /* Single precision floating add/subtract - checked against KS10 ucode\r | |
376 | The KS10 shifts the smaller operand regardless of the exponent diff.\r | |
377 | This code will not shift more than 63 places; shifts beyond that\r | |
378 | cannot change the value of the smaller operand.\r | |
379 | \r | |
380 | If the signs of the operands are the same, the result sign is the\r | |
381 | same as the source sign; the sign of the result fraction is actually\r | |
382 | part of the data. If the signs of the operands are different, the\r | |
383 | result sign is determined by the fraction sign.\r | |
384 | */\r | |
385 | \r | |
386 | d10 fad (d10 op1, d10 op2, t_bool rnd, int32 inv)\r | |
387 | {\r | |
388 | int32 ediff;\r | |
389 | UFP a, b, t;\r | |
390 | \r | |
391 | if (inv) op2 = NEG (op2); /* subtract? -b */\r | |
392 | if (op1 == 0) funpack (op2, 0, &a, AFRC); /* a = 0? result is b */\r | |
393 | else if (op2 == 0) funpack (op1, 0, &a, AFRC); /* b = 0? result is a */\r | |
394 | else {\r | |
395 | funpack (op1, 0, &a, SFRC); /* unpack operands */\r | |
396 | funpack (op2, 0, &b, SFRC); /* fracs are 2's comp */\r | |
397 | ediff = a.exp - b.exp; /* get exp diff */\r | |
398 | if (ediff < 0) { /* a < b? switch */\r | |
399 | t = a;\r | |
400 | a = b;\r | |
401 | b = t;\r | |
402 | ediff = -ediff;\r | |
403 | }\r | |
404 | if (ediff > 63) ediff = 63; /* cap diff at 63 */\r | |
405 | if (ediff) b.fhi = (t_int64) b.fhi >> ediff; /* shift b (signed) */\r | |
406 | a.fhi = a.fhi + b.fhi; /* add fractions */\r | |
407 | if (a.sign ^ b.sign) { /* add or subtract? */\r | |
408 | if (a.fhi & FP_UCRY) { /* subtract, frac -? */\r | |
409 | a.fhi = UNEG (a.fhi); /* complement result */\r | |
410 | a.sign = 1; /* result is - */\r | |
411 | }\r | |
412 | else a.sign = 0; /* result is + */\r | |
413 | }\r | |
414 | else {\r | |
415 | if (a.sign) a.fhi = UNEG (a.fhi); /* add, src -? comp */\r | |
416 | if (a.fhi & FP_UCRY) { /* check for carry */\r | |
417 | a.fhi = a.fhi >> 1; /* flo won't be used */\r | |
418 | a.exp = a.exp + 1;\r | |
419 | }\r | |
420 | }\r | |
421 | }\r | |
422 | fnorm (&a, (rnd? FP_URNDS: 0)); /* normalize, round */\r | |
423 | return fpack (&a, NULL, FALSE);\r | |
424 | }\r | |
425 | \r | |
426 | /* Single precision floating multiply. Because the fractions are 27b,\r | |
427 | a 64b multiply can be used for the fraction multiply. The 27b\r | |
428 | fractions are positioned 0'frac'0000, resulting in 00'hifrac'0..0.\r | |
429 | The extra 0 is accounted for by biasing the result exponent.\r | |
430 | */\r | |
431 | \r | |
432 | #define FP_V_SPM (FP_V_UFHI - (32 - FP_N_FHI - 1))\r | |
433 | d10 fmp (d10 op1, d10 op2, t_bool rnd)\r | |
434 | {\r | |
435 | UFP a, b;\r | |
436 | \r | |
437 | funpack (op1, 0, &a, AFRC); /* unpack operands */\r | |
438 | funpack (op2, 0, &b, AFRC); /* fracs are abs val */\r | |
439 | if ((a.fhi == 0) || (b.fhi == 0)) return 0; /* either 0? */\r | |
440 | a.sign = a.sign ^ b.sign; /* result sign */\r | |
441 | a.exp = a.exp + b.exp - FP_BIAS + 1; /* result exponent */\r | |
442 | a.fhi = (a.fhi >> FP_V_SPM) * (b.fhi >> FP_V_SPM); /* high 27b of result */\r | |
443 | fnorm (&a, (rnd? FP_URNDS: 0)); /* normalize, round */\r | |
444 | return fpack (&a, NULL, FALSE);\r | |
445 | }\r | |
446 | \r | |
447 | /* Single precision floating divide. Because the fractions are 27b, a\r | |
448 | 64b divide can be used for the fraction divide. Note that 28b-29b\r | |
449 | of fraction are developed; the code will do one special normalize to\r | |
450 | make sure that the 28th bit is not lost. Also note the special\r | |
451 | treatment of negative quotients with non-zero remainders; this\r | |
452 | implements the note on p2-23 of the Processor Reference Manual.\r | |
453 | */\r | |
454 | \r | |
455 | t_bool fdv (d10 op1, d10 op2, d10 *rs, t_bool rnd)\r | |
456 | {\r | |
457 | UFP a, b;\r | |
458 | t_uint64 savhi;\r | |
459 | t_bool rem = FALSE;\r | |
460 | \r | |
461 | funpack (op1, 0, &a, AFRC); /* unpack operands */\r | |
462 | funpack (op2, 0, &b, AFRC); /* fracs are abs val */\r | |
463 | if (a.fhi >= 2 * b.fhi) { /* will divide work? */\r | |
464 | SETF (F_AOV | F_DCK | F_FOV | F_T1);\r | |
465 | return FALSE;\r | |
466 | }\r | |
467 | if (savhi = a.fhi) { /* dvd = 0? quo = 0 */\r | |
468 | a.sign = a.sign ^ b.sign; /* result sign */\r | |
469 | a.exp = a.exp - b.exp + FP_BIAS + 1; /* result exponent */\r | |
470 | a.fhi = a.fhi / (b.fhi >> (FP_N_FHI + 1)); /* do divide */\r | |
471 | if (a.sign && (savhi != (a.fhi * (b.fhi >> (FP_N_FHI + 1)))))\r | |
472 | rem = TRUE; /* KL/KS hack */\r | |
473 | a.fhi = a.fhi << (FP_V_UNORM - FP_N_FHI - 1); /* put quo in place */\r | |
474 | if ((a.fhi & FP_UNORM) == 0) { /* normalize 1b */\r | |
475 | a.fhi = a.fhi << 1; /* before masking */\r | |
476 | a.exp = a.exp - 1;\r | |
477 | }\r | |
478 | a.fhi = a.fhi & (FP_UFHI | FP_URNDS); /* mask quo to 28b */\r | |
479 | }\r | |
480 | fnorm (&a, (rnd? FP_URNDS: 0)); /* normalize, round */\r | |
481 | *rs = fpack (&a, NULL, rem); /* pack result */\r | |
482 | return TRUE;\r | |
483 | }\r | |
484 | \r | |
485 | /* Single precision floating scale. */\r | |
486 | \r | |
487 | d10 fsc (d10 val, a10 ea)\r | |
488 | {\r | |
489 | int32 sc = LIT8 (ea);\r | |
490 | UFP a;\r | |
491 | \r | |
492 | if (val == 0) return 0;\r | |
493 | funpack (val, 0, &a, AFRC); /* unpack operand */\r | |
494 | if (ea & RSIGN) a.exp = a.exp - sc; /* adjust exponent */\r | |
495 | else a.exp = a.exp + sc;\r | |
496 | fnorm (&a, 0); /* renormalize */\r | |
497 | return fpack (&a, NULL, FALSE); /* pack result */\r | |
498 | }\r | |
499 | \r | |
500 | /* Float integer operand and round */\r | |
501 | \r | |
502 | d10 fltr (d10 mb)\r | |
503 | {\r | |
504 | UFP a;\r | |
505 | d10 val = ABS (mb);\r | |
506 | \r | |
507 | a.sign = GET_FPSIGN (mb); /* get sign */\r | |
508 | a.exp = FP_BIAS + 36; /* initial exponent */\r | |
509 | a.fhi = val << (FP_V_UNORM - 35); /* left justify op */\r | |
510 | a.flo = 0;\r | |
511 | fnorm (&a, FP_URNDS); /* normalize, round */\r | |
512 | return fpack (&a, NULL, FALSE); /* pack result */\r | |
513 | }\r | |
514 | \r | |
515 | /* Fix and truncate/round floating operand */\r | |
516 | \r | |
517 | void fix (int32 ac, d10 mb, t_bool rnd)\r | |
518 | {\r | |
519 | int32 sc;\r | |
520 | t_uint64 so;\r | |
521 | UFP a;\r | |
522 | \r | |
523 | funpack (mb, 0, &a, AFRC); /* unpack operand */\r | |
524 | if (a.exp > (FP_BIAS + FP_N_FHI + FP_N_EXP)) SETF (F_AOV | F_T1);\r | |
525 | else if (a.exp < FP_BIAS) AC(ac) = 0; /* < 1/2? */\r | |
526 | else {\r | |
527 | sc = FP_V_UNORM - (a.exp - FP_BIAS) + 1;\r | |
528 | AC(ac) = a.fhi >> sc;\r | |
529 | if (rnd) {\r | |
530 | so = a.fhi << (64 - sc);\r | |
531 | if (so >= (0x8000000000000000 + a.sign)) AC(ac) = AC(ac) + 1;\r | |
532 | }\r | |
533 | if (a.sign) AC(ac) = NEG (AC(ac));\r | |
534 | }\r | |
535 | return;\r | |
536 | }\r | |
537 | \r | |
538 | /* Double precision floating add/subtract\r | |
539 | Since a.flo is 0, adding b.flo is just a copy - this is incorporated into\r | |
540 | the denormalization step. If there's no denormalization, bflo is zero too.\r | |
541 | */\r | |
542 | \r | |
543 | void dfad (int32 ac, d10 *rs, int32 inv)\r | |
544 | {\r | |
545 | int32 p1 = ADDAC (ac, 1);\r | |
546 | int32 ediff;\r | |
547 | UFP a, b, t;\r | |
548 | \r | |
549 | if (inv) { DMOVN (rs); } /* subtract? -b */\r | |
550 | if ((AC(ac) | AC(p1)) == 0) funpack (rs[0], rs[1], &a, AFRC);\r | |
551 | /* a == 0? sum = b */\r | |
552 | else if ((rs[0] | rs[1]) == 0) funpack (AC(ac), AC(p1), &a, AFRC);\r | |
553 | /* b == 0? sum = a */\r | |
554 | else {\r | |
555 | funpack (AC(ac), AC(p1), &a, SFRC); /* unpack operands */\r | |
556 | funpack (rs[0], rs[1], &b, SFRC);\r | |
557 | ediff = a.exp - b.exp; /* get exp diff */\r | |
558 | if (ediff < 0) { /* a < b? switch */\r | |
559 | t = a;\r | |
560 | a = b;\r | |
561 | b = t;\r | |
562 | ediff = -ediff;\r | |
563 | }\r | |
564 | if (ediff > 127) ediff = 127; /* cap diff at 127 */\r | |
565 | if (ediff > 63) { /* diff > 63? */\r | |
566 | a.flo = (t_int64) b.fhi >> (ediff - 64); /* b hi to a lo */\r | |
567 | b.fhi = b.sign? FP_ONES: 0; /* hi = all sign */\r | |
568 | }\r | |
569 | else if (ediff) { /* diff <= 63 */\r | |
570 | a.flo = (b.flo >> ediff) | (b.fhi << (64 - ediff));\r | |
571 | b.fhi = (t_int64) b.fhi >> ediff; /* shift b (signed) */\r | |
572 | }\r | |
573 | a.fhi = a.fhi + b.fhi; /* do add */\r | |
574 | if (a.sign ^ b.sign) { /* add or subtract? */\r | |
575 | if (a.fhi & FP_UCRY) { /* subtract, frac -? */\r | |
576 | DUNEG (a); /* complement result */\r | |
577 | a.sign = 1; /* result is - */\r | |
578 | }\r | |
579 | else a.sign = 0; /* result is + */\r | |
580 | }\r | |
581 | else {\r | |
582 | if (a.sign) { DUNEG (a); }; /* add, src -? comp */\r | |
583 | if (a.fhi & FP_UCRY) { /* check for carry */\r | |
584 | a.fhi = a.fhi >> 1; /* flo won't be used */\r | |
585 | a.exp = a.exp + 1;\r | |
586 | }\r | |
587 | }\r | |
588 | }\r | |
589 | fnorm (&a, FP_URNDD); /* normalize, round */\r | |
590 | AC(ac) = fpack (&a, &AC(p1), FALSE); /* pack result */\r | |
591 | return;\r | |
592 | }\r | |
593 | \r | |
594 | /* Double precision floating multiply\r | |
595 | The 62b fractions are multiplied, with cross products, to produce a\r | |
596 | 124b fraction with two leading and two trailing 0's. Because the\r | |
597 | product has 2 leading 0's, instead of the normal 1, an extra\r | |
598 | normalization step is needed. Accordingly, the exponent calculation\r | |
599 | increments the result exponent, to compensate for normalization.\r | |
600 | */\r | |
601 | \r | |
602 | void dfmp (int32 ac, d10 *rs)\r | |
603 | {\r | |
604 | int32 p1 = ADDAC (ac, 1);\r | |
605 | t_uint64 xh, xl, yh, yl, mid;\r | |
606 | UFP a, b;\r | |
607 | \r | |
608 | funpack (AC(ac), AC(p1), &a, AFRC); /* unpack operands */\r | |
609 | funpack (rs[0], rs[1], &b, AFRC);\r | |
610 | if ((a.fhi == 0) || (b.fhi == 0)) { /* either 0? result 0 */\r | |
611 | AC(ac) = AC(p1) = 0;\r | |
612 | return;\r | |
613 | }\r | |
614 | a.sign = a.sign ^ b.sign; /* result sign */\r | |
615 | a.exp = a.exp + b.exp - FP_BIAS + 1; /* result exponent */\r | |
616 | xh = a.fhi >> 32; /* split 62b fracs */\r | |
617 | xl = a.fhi & MSK32; /* into 32b halves */\r | |
618 | yh = b.fhi >> 32;\r | |
619 | yl = b.fhi & MSK32;\r | |
620 | a.fhi = xh * yh; /* hi xproduct */\r | |
621 | a.flo = xl * yl; /* low xproduct */\r | |
622 | mid = (xh * yl) + (yh * xl); /* fits in 64b */\r | |
623 | a.flo = a.flo + (mid << 32); /* add mid lo to lo */\r | |
624 | a.fhi = a.fhi + ((mid >> 32) & MSK32) + (a.flo < (mid << 32));\r | |
625 | fnorm (&a, FP_URNDD); /* normalize, round */\r | |
626 | AC(ac) = fpack (&a, &AC(p1), FALSE); /* pack result */\r | |
627 | return;\r | |
628 | }\r | |
629 | \r | |
630 | /* Double precision floating divide\r | |
631 | This algorithm develops a full 62 bits of quotient, plus one rounding\r | |
632 | bit, in the low order 63b of a 64b number. To do this, we must assure\r | |
633 | that the initial divide step generates a 1. If it would fail, shift\r | |
634 | the dividend left and decrement the result exponent accordingly.\r | |
635 | */\r | |
636 | \r | |
637 | void dfdv (int32 ac, d10 *rs)\r | |
638 | {\r | |
639 | int32 p1 = ADDAC (ac, 1);\r | |
640 | int32 i;\r | |
641 | t_uint64 qu = 0;\r | |
642 | UFP a, b;\r | |
643 | \r | |
644 | funpack (AC(ac), AC(p1), &a, AFRC); /* unpack operands */\r | |
645 | funpack (rs[0], rs[1], &b, AFRC);\r | |
646 | if (a.fhi >= 2 * b.fhi) { /* will divide work? */\r | |
647 | SETF (F_AOV | F_DCK | F_FOV | F_T1);\r | |
648 | return;\r | |
649 | }\r | |
650 | if (a.fhi) { /* dvd = 0? quo = 0 */\r | |
651 | a.sign = a.sign ^ b.sign; /* result sign */\r | |
652 | a.exp = a.exp - b.exp + FP_BIAS + 1; /* result exponent */\r | |
653 | if (a.fhi < b.fhi) { /* make sure initial */\r | |
654 | a.fhi = a.fhi << 1; /* divide step will work */\r | |
655 | a.exp = a.exp - 1;\r | |
656 | }\r | |
657 | for (i = 0; i < 63; i++) { /* 63b of quotient */\r | |
658 | qu = qu << 1; /* shift quotient */\r | |
659 | if (a.fhi >= b.fhi) { /* will div work? */\r | |
660 | a.fhi = a.fhi - b.fhi; /* sub, quo = 1 */\r | |
661 | qu = qu + 1;\r | |
662 | }\r | |
663 | a.fhi = a.fhi << 1; /* shift dividend */\r | |
664 | }\r | |
665 | a.fhi = qu;\r | |
666 | }\r | |
667 | fnorm (&a, FP_URNDD); /* normalize, round */\r | |
668 | AC(ac) = fpack (&a, &AC(p1), FALSE); /* pack result */\r | |
669 | return;\r | |
670 | }\r | |
671 | \r | |
672 | /* Unpack floating point operand */\r | |
673 | \r | |
674 | void funpack (d10 h, d10 l, UFP *r, t_bool sgn)\r | |
675 | {\r | |
676 | d10 fphi, fplo;\r | |
677 | \r | |
678 | r->sign = GET_FPSIGN (h);\r | |
679 | r->exp = GET_FPEXP (h);\r | |
680 | fphi = GET_FPHI (h);\r | |
681 | fplo = GET_FPLO (l);\r | |
682 | r->fhi = (fphi << FP_V_UFHI) | (fplo << FP_V_UFLO);\r | |
683 | r->flo = 0;\r | |
684 | if (r->sign) {\r | |
685 | r->exp = r->exp ^ FP_M_EXP; /* 1s comp exp */\r | |
686 | if (sgn) { /* signed frac? */\r | |
687 | if (r->fhi) r->fhi = r->fhi | FP_UCRY; /* extend sign */ \r | |
688 | else {\r | |
689 | r->exp = r->exp + 1;\r | |
690 | r->fhi = FP_UCRY | FP_UNORM;\r | |
691 | }\r | |
692 | }\r | |
693 | else { /* abs frac */\r | |
694 | if (r->fhi) r->fhi = UNEG (r->fhi) & FP_UFRAC;\r | |
695 | else {\r | |
696 | r->exp = r->exp + 1;\r | |
697 | r->fhi = FP_UNORM;\r | |
698 | }\r | |
699 | }\r | |
700 | }\r | |
701 | return;\r | |
702 | }\r | |
703 | \r | |
704 | /* Normalize and optionally round floating point operand */\r | |
705 | \r | |
706 | void fnorm (UFP *a, t_int64 rnd)\r | |
707 | {\r | |
708 | int32 i;\r | |
709 | static t_uint64 normmask[6] = {\r | |
710 | 0x6000000000000000, 0x7800000000000000, 0x7F80000000000000,\r | |
711 | 0x7FFF800000000000, 0x7FFFFFFF80000000, 0x7FFFFFFFFFFFFFFF\r | |
712 | };\r | |
713 | static int32 normtab[7] = { 1, 2, 4, 8, 16, 32, 63 };\r | |
714 | extern a10 pager_PC;\r | |
715 | \r | |
716 | if (a->fhi & FP_UCRY) { /* carry set? */\r | |
717 | printf ("%%PDP-10 FP: carry bit set at normalization, PC = %o\n", pager_PC);\r | |
718 | a->flo = (a->flo >> 1) | ((a->fhi & 1) << 63); /* try to recover */\r | |
719 | a->fhi = a->fhi >> 1; /* but root cause */\r | |
720 | a->exp = a->exp + 1; /* should be fixed! */\r | |
721 | }\r | |
722 | if ((a->fhi | a->flo) == 0) { /* if fraction = 0 */\r | |
723 | a->sign = a->exp = 0; /* result is 0 */\r | |
724 | return;\r | |
725 | }\r | |
726 | while ((a->fhi & FP_UNORM) == 0) { /* normalized? */\r | |
727 | for (i = 0; i < 6; i++) {\r | |
728 | if (a->fhi & normmask[i]) break;\r | |
729 | }\r | |
730 | a->fhi = (a->fhi << normtab[i]) | (a->flo >> (64 - normtab[i]));\r | |
731 | a->flo = a->flo << normtab[i];\r | |
732 | a->exp = a->exp - normtab[i];\r | |
733 | }\r | |
734 | if (rnd) { /* rounding? */\r | |
735 | a->fhi = a->fhi + rnd; /* add round const */\r | |
736 | if (a->fhi & FP_UCRY) { /* if carry out, */\r | |
737 | a->fhi = a->fhi >> 1; /* renormalize */\r | |
738 | a->exp = a->exp + 1;\r | |
739 | }\r | |
740 | }\r | |
741 | return;\r | |
742 | }\r | |
743 | \r | |
744 | /* Pack floating point result */\r | |
745 | \r | |
746 | d10 fpack (UFP *r, d10 *lo, t_bool fdvneg)\r | |
747 | {\r | |
748 | d10 val[2];\r | |
749 | \r | |
750 | if (r->exp < 0) SETF (F_AOV | F_FOV | F_FXU | F_T1);\r | |
751 | else if (r->exp > FP_M_EXP) SETF (F_AOV | F_FOV | F_T1);\r | |
752 | val[0] = (((((d10) r->exp) & FP_M_EXP) << FP_V_EXP) |\r | |
753 | ((r->fhi & FP_UFHI) >> FP_V_UFHI)) & DMASK;\r | |
754 | if (lo) val[1] = ((r->fhi & FP_UFLO) >> FP_V_UFLO) & MMASK;\r | |
755 | else val[1] = 0;\r | |
756 | if (r->sign) { /* negate? */\r | |
757 | if (fdvneg) { /* fdvr special? */\r | |
758 | val[1] = ~val[1] & MMASK; /* 1's comp */\r | |
759 | val[0] = ~val[0] & DMASK;\r | |
760 | }\r | |
761 | else { DMOVN (val); } /* 2's comp */\r | |
762 | }\r | |
763 | if (lo) *lo = val[1];\r | |
764 | return val[0];\r | |
765 | }\r |