Annotation of src/sys/arch/pc532/fpu/ieee_subnormal.c, Revision 1.4
1.4 ! chs 1: /* $NetBSD: ieee_subnormal.c,v 1.3 2004/01/23 04:12:39 simonb Exp $ */
1.1 phil 2:
1.3 simonb 3: /*
1.1 phil 4: * IEEE floating point support for NS32081 and NS32381 fpus.
5: * Copyright (c) 1995 Ian Dall
6: * All Rights Reserved.
1.3 simonb 7: *
1.1 phil 8: * Permission to use, copy, modify and distribute this software and its
9: * documentation is hereby granted, provided that both the copyright
10: * notice and this permission notice appear in all copies of the
11: * software, derivative works or modified versions, and any portions
12: * thereof, and that both notices appear in supporting documentation.
1.3 simonb 13: *
1.1 phil 14: * IAN DALL ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
15: * IAN DALL DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
16: * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17: */
1.3 simonb 18: /*
1.1 phil 19: * File: ieee_subnormal.c
20: * Author: Ian Dall
21: * Date: November 1995
22: *
23: * Handle operations which generated underflow traps. Subnormal
24: * (denormalized numbers) are generated as required.
25: *
26: * HISTORY
27: * 14-Dec-95 Ian Dall (Ian.Dall@dsto.defence.gov.au)
28: * First release.
29: *
30: */
1.2 lukem 31:
32: #include <sys/cdefs.h>
1.4 ! chs 33: __KERNEL_RCSID(0, "$NetBSD: ieee_subnormal.c,v 1.3 2004/01/23 04:12:39 simonb Exp $");
1.2 lukem 34:
1.1 phil 35: #include "ieee_internal.h"
1.2 lukem 36:
1.1 phil 37: #include <machine/psl.h>
38:
39: /* Return bit pos numbered from lsb 0 to 31. Returns 31 if no bit is set */
40: static int find_msb(unsigned int t) {
41: static const int b_mask[] = {
42: 0xffff0000,
43: 0xff00ff00,
44: 0xf0f0f0f0,
45: 0xcccccccc,
46: 0xaaaaaaaa };
47: int i;
48: int pos = 31;
49: int bit_incr = 16; /* Half no of bits in int */
50: for (i = 0; i < 5; i++, bit_incr /= 2) {
51: if(t & b_mask[i]) {
52: t &= b_mask[i];
53: }
54: else {
55: pos -= bit_incr;
56: t &= ~b_mask[i];
57: }
58: }
59: return pos;
60: }
61:
62: static int leading_zeros(union t_conv *data)
63: {
64: unsigned int t;
65: if ((t = data->d_bits.mantissa)) {
66: return 19 - find_msb(t);
67: }
68: else if ((t = data->d_bits.mantissa2)) {
69: return 51 - find_msb(t);
70: }
71: else
72: return 52;
73: }
74:
75: static void lshift_mantissa(union t_conv *data, int n)
76: {
77: unsigned long t[2];
78: t[1] = data->d_bits.mantissa;
79: t[0] = data->d_bits.mantissa2;
80: *(unsigned long long *) t <<= n;
81: data->d_bits.mantissa = t[1];
82: data->d_bits.mantissa2 = t[0];
83: }
84:
85:
86: static void rshift_mantissa(union t_conv *data, int n)
87: {
88: unsigned long t[2];
89: t[1] = data->d_bits.mantissa | 0x100000;
90: t[0] = data->d_bits.mantissa2;
91: *(unsigned long long *) t >>= n;
92: data->d_bits.mantissa = t[1];
93: data->d_bits.mantissa2 = t[0];
94: }
95:
96: /* After this, the data is a normal double and the returned value is
97: * such that:
98: * union t_conv t;
99: * t = *data;
100: * norm = normalize(&t);
101: * 2**norm * t.d == data->d;
102: *
103: * Assume data is not already normalized.
104: */
105: int ieee_normalize(union t_conv *data)
106: {
107: int norm;
108: if(data->d_bits.exp != 0)
109: return 0;
110: norm = leading_zeros(data) + 1; /* plus one for the implied bit */
111: data->d_bits.exp = 1;
112: lshift_mantissa(data, norm);
113: return -norm;
114: }
115:
116: /* Divide by 2**n producing a denormalized no if necessary */
1.3 simonb 117: static void denormalize(union t_conv *data, int n)
1.1 phil 118: {
119: int exp = data->d_bits.exp;
120: if(exp > n)
121: exp -= n;
122: else if (exp <= n) {
123: rshift_mantissa(data, n - exp + 1); /* plus 1 for the implied bit */
124: exp = 0;
125: }
126: data->d_bits.exp = exp;
127: }
128:
129: static int scale_and_check(union t_conv *d, int scale)
130: {
131: int exp;
132: exp = d->d_bits.exp - scale;
133: if(exp >= 0x7ff) {
134: /* Overflow */
135: d->d_bits.exp = 0x7ff;
136: d->d_bits.mantissa = 0;
137: d->d_bits.mantissa2 = 0; /* XXX sig */
138: return FPC_TT_OVFL;
139: }
140: if(exp <= 0) {
141: /* Underflow */
142: denormalize(d, scale); /* XXX sig */
143: return FPC_TT_UNDFL;
144: }
145: d->d_bits.exp = exp;
146: return FPC_TT_NONE;
147: }
148:
149:
150: /* Add two doubles, not caring if one or both is a de-norm.
151: * Strategy: First scale and normalize operands so the addition
152: * can't overflow or underflow, then do a normal floating point
153: * addition, then scale back and possibly denormalize.
154: */
155: int ieee_add(double data1, double *data2)
156: {
157: union t_conv d1 = (union t_conv) data1;
158: union t_conv *d2 = (union t_conv *) data2;
159: int scale;
160: int norm1 = ieee_normalize(&d1);
161: int norm2 = ieee_normalize(d2);
162: int exp1 = d1.d_bits.exp + norm1;
163: int exp2 = d2->d_bits.exp + norm2;
164: if(exp1 > exp2) {
165: scale = EXP_DBIAS - exp1;
166: exp1 = EXP_DBIAS;
167: exp2 += scale;
168: }
169: else {
170: scale = EXP_DBIAS - exp2;
171: exp2 = EXP_DBIAS;
172: exp1 += scale;
173: }
174: if(exp1 > 0) {
175: d1.d_bits.exp = exp1;
176: if (exp2 > 0) {
177: d2->d_bits.exp = exp2;
178: d2->d += d1.d;
179: }
180: else {
181: d2->d = d1.d;
182: }
183: }
184: else {
185: d2->d_bits.exp = exp2;
186: }
187: return scale_and_check(d2, scale);
188: }
189:
190: /* Multiply two doubles, not caring if one or both is a de-norm.
191: * Strategy: First scale and normalize operands so the multiplication
192: * can't overflow or underflow, then do a normal floating point
193: * addition, then scale back and possibly denormalize.
194: */
195: int ieee_mul(double data1, double *data2)
196: {
197: union t_conv d1 = (union t_conv) data1;
198: union t_conv *d2 = (union t_conv *) data2;
199: int scale;
200: int norm1 = ieee_normalize(&d1);
201: int norm2 = ieee_normalize(d2);
202: int exp1 = d1.d_bits.exp + norm1;
203: int exp2 = d2->d_bits.exp + norm2;
204: d1.d_bits.exp = EXP_DBIAS; /* Add EXP_DBIAS - exp1 */
205: d2->d_bits.exp = EXP_DBIAS;
206: d2->d *= d1.d;
207: scale = EXP_DBIAS - exp1 + EXP_DBIAS - exp2;
208: return scale_and_check(d2, scale);
209: }
210:
211: /* Divide d2 by d1, not caring if one or both is a de-norm.
212: * Strategy: First scale and normalize operands so the division
213: * can't overflow or underflow, then do a normal floating point
214: * division, then scale back and possibly denormalize.
215: */
216: int ieee_div(double data1, double *data2)
217: {
218: union t_conv d1 = (union t_conv) data1;
219: union t_conv *d2 = (union t_conv *) data2;
220: int scale;
221: int norm1 = ieee_normalize(&d1);
222: int norm2 = ieee_normalize(d2);
223: int exp1 = d1.d_bits.exp + norm1;
224: int exp2 = d2->d_bits.exp + norm2;
225: d1.d_bits.exp = EXP_DBIAS; /* Add EXP_DBIAS - exp1 */
226: d2->d_bits.exp = EXP_DBIAS;
227: d2->d /= d1.d;
228: scale = exp1 - exp2;
229: return scale_and_check(d2, scale);
230: }
231:
232: /* Add mul-add three doubles d1 * d2 + d3 -> d3, not caring if any a de-norm.
233: * Strategy: First scale and normalize operands so the operations
234: * can't overflow or underflow, then do a normal floating point operation
235: * addition, then scale back and possibly denormalize.
236: */
237: int ieee_dot(double data1, double data2, double *data3)
238: {
239: union t_conv d1 = (union t_conv) data1;
240: union t_conv d2 = (union t_conv) data2;
241: union t_conv *d3 = (union t_conv *) data3;
242: int scale;
243: int norm1 = ieee_normalize(&d1);
244: int norm2 = ieee_normalize(&d2);
245: int norm3 = ieee_normalize(d3);
246: int exp1 = d1.d_bits.exp + norm1;
247: int exp2 = d2.d_bits.exp + norm2;
248: int exp3 = d3->d_bits.exp + norm3;
249: int exp_prod = exp1 + exp2;
250: if(exp_prod > exp3) {
251: scale = EXP_DBIAS + EXP_DBIAS - exp_prod;
252: exp1 = EXP_DBIAS; /* Add EXP_DBIAS - exp1 */
253: exp2 = EXP_DBIAS;
254: exp3 += scale;
255: }
256: else {
257: scale = EXP_DBIAS - exp3;
258: exp3 = EXP_DBIAS;
259: exp1 = (exp_prod + scale)/2;
260: exp2 = exp_prod + scale - exp1;
261: }
262:
263: if(exp1 > 0 && exp2 > 0) {
264: d1.d_bits.exp = exp1;
265: d2.d_bits.exp = exp2;
266: if(exp3 > 0) {
267: d3->d_bits.exp = exp3;
268: d3->d += d1.d * d2.d;
269: }
270: else {
271: d3->d = d1.d * d2.d;
272: }
273: }
274: else {
275: d3->d_bits.exp = exp3;
276: }
277: return scale_and_check(d3, scale);
278: }
279:
280: /* Compare the magnitude of two ops.
281: * return: 1 |op1| > |op2|
282: * -1 |op1| < |op2|
283: * 0 |op1| == |op2|
284: */
285: static int u_cmp(double data1, double data2)
286: {
287: union t_conv d1 = (union t_conv) data1;
288: union t_conv d2 = (union t_conv) data2;
289: int exp1 = d1.d_bits.exp;
290: int exp2 = d2.d_bits.exp;
291: if (exp1 > exp2)
292: return 1;
293: else if (exp1 < exp2)
294: return -1;
295: else if (d1.d_bits.mantissa > d2.d_bits.mantissa)
296: return 1;
297: else if (d1.d_bits.mantissa < d2.d_bits.mantissa)
298: return -1;
299: else if (d1.d_bits.mantissa2 > d2.d_bits.mantissa2)
300: return 1;
301: else if (d1.d_bits.mantissa2 < d2.d_bits.mantissa2)
302: return -1;
303: else
304: return 0;
305: }
306:
1.4 ! chs 307: void ieee_cmp(double data1, double data2, state *mystate)
1.1 phil 308: {
309: union t_conv d1 = (union t_conv) data1;
310: union t_conv d2 = (union t_conv) data2;
311: int sign1 = d1.d_bits.sign;
312: int sign2 = d2.d_bits.sign;
1.4 ! chs 313: mystate->PSR &= ~(PSR_N | PSR_Z | PSR_L);
1.1 phil 314: switch(sign2 * 2 + sign1) {
315: case 2: /* op2 is negative op1 is positive */
1.4 ! chs 316: mystate->PSR |= PSR_N;
1.1 phil 317: break;
318: case 1: /* op2 is positive op1 is negative */
319: break;
320: case 0: /* Both ops same sign */
321: case 3:
322: {
323: int cmp = u_cmp(d1.d, d2.d);
324: if(sign1)
325: cmp *= -1;
326: if(cmp > 0)
1.4 ! chs 327: mystate->PSR |= PSR_N;
1.1 phil 328: else if (cmp == 0)
1.4 ! chs 329: mystate->PSR |= PSR_Z;
1.1 phil 330: }
331: break;
332: }
333: return;
334: }
335:
336:
337: int ieee_scalb(double data1, double *data2)
338: {
339: union t_conv d1 = (union t_conv) data1;
340: union t_conv *d2 = (union t_conv *) data2;
341: int exp2 = d2->d_bits.exp - EXP_DBIAS;
342: int n;
343: if (exp2 > 16) {
344: *d2 = infty;
345: d2->d_bits.sign = d1.d_bits.sign;
346: return FPC_TT_OVFL;
347: }
348: else if (exp2 < -16) {
349: d2->d = 0.0;
350: d2->d_bits.sign = d1.d_bits.sign;
351: return FPC_TT_OVFL;
352: }
353: n = d2->d;
354: *d2 = d1;
355: return scale_and_check(d2, n);
356: }
357:
358: /* With no trap, hardware produces zero, which is fast but not
359: * strictly correct. We should always have the hardware trap bit set
360: * and generate denormalized numbers by simulation unless the user
361: * indicates via the FPC_UNDE flag they want to handle it. */
362:
363: int ieee_undfl(struct operand *op1, struct operand *op2,
1.4 ! chs 364: struct operand *f0_op, int xopcode, state *mystate)
1.1 phil 365: {
1.4 ! chs 366: unsigned int fsr = mystate->FSR;
1.1 phil 367: int user_trap = FPC_TT_NONE;
368: DP(1, "Underflow trap: xopcode = 0x%x\n", xopcode);
369: if (fsr & FPC_UNDE) {
370: user_trap = FPC_TT_UNDFL;
371: }
372: else {
373: user_trap = FPC_TT_NONE;
374: /* Calculate correct denormal output. The easiest way is to
375: * prescale the operands so they won't underflow, then use the
376: * hardware operation, then post scale.
377: */
378:
379: /* The exact sematics are a bit tricky. Apparently, we should only
380: * set flag if we underflowed *and* there was loss of precision
381: * For now, just set the flag always XXX
382: */
383: fsr |= FPC_UF;
384:
385: switch(xopcode) {
386: case NEGF:
387: op1->data.d_bits.sign ^= 1;
388: /* Fall through */
389: case MOVF:
390: case MOVLF:
391: case MOVFL:
392: op2->data = op1->data;
393: break;
394: case CMPF:
1.4 ! chs 395: ieee_cmp(op1->data.d, op2->data.d, mystate);
1.1 phil 396: break;
397: case SUBF:
398: op1->data.d_bits.sign ^= 1;
399: /* Fall through */
400: case ADDF:
401: user_trap = ieee_add(op1->data.d, &op2->data.d);
402: break;
403: case MULF:
404: user_trap = ieee_mul(op1->data.d, &op2->data.d);
405: break;
406: case DIVF:
407: user_trap = ieee_div(op1->data.d, &op2->data.d);
408: break;
409: case ROUNDFI:
410: case TRUNCFI:
411: case FLOORFI:
412: op2->data.i = 0;
413: break;
414: case SCALBF:
415: user_trap = ieee_scalb(op1->data.d, &op2->data.d);
416: break;
417: case LOGBF:
418: op2->data.d = 0.0;
419: break;
420: case DOTF:
421: user_trap = ieee_dot(op1->data.d, op2->data.d, &f0_op->data.d);
422: break;
423: case POLYF:
424: {
425: union t_conv t = op2->data;
426: user_trap = ieee_dot(f0_op->data.d, op1->data.d, &t.d);
427: f0_op->data = t;
428: }
429: break;
430: }
431: }
1.4 ! chs 432: mystate->FSR = fsr;
1.1 phil 433: return user_trap;
434: }
CVSweb <webmaster@jp.NetBSD.org>