[BACK]Return to ieee_subnormal.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / arch / pc532 / fpu

Annotation of src/sys/arch/pc532/fpu/ieee_subnormal.c, Revision 1.4

1.4     ! chs         1: /*     $NetBSD: ieee_subnormal.c,v 1.3 2004/01/23 04:12:39 simonb Exp $        */
1.1       phil        2:
1.3       simonb      3: /*
1.1       phil        4:  * IEEE floating point support for NS32081 and NS32381 fpus.
                      5:  * Copyright (c) 1995 Ian Dall
                      6:  * All Rights Reserved.
1.3       simonb      7:  *
1.1       phil        8:  * Permission to use, copy, modify and distribute this software and its
                      9:  * documentation is hereby granted, provided that both the copyright
                     10:  * notice and this permission notice appear in all copies of the
                     11:  * software, derivative works or modified versions, and any portions
                     12:  * thereof, and that both notices appear in supporting documentation.
1.3       simonb     13:  *
1.1       phil       14:  * IAN DALL ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.
                     15:  * IAN DALL DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
                     16:  * WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
                     17:  */
1.3       simonb     18: /*
1.1       phil       19:  *     File:   ieee_subnormal.c
                     20:  *     Author: Ian Dall
                     21:  *     Date:   November 1995
                     22:  *
                     23:  *     Handle operations which generated underflow traps. Subnormal
                     24:  *     (denormalized numbers) are generated as required.
                     25:  *
                     26:  * HISTORY
                     27:  * 14-Dec-95  Ian Dall (Ian.Dall@dsto.defence.gov.au)
                     28:  *     First release.
                     29:  *
                     30:  */
1.2       lukem      31:
                     32: #include <sys/cdefs.h>
1.4     ! chs        33: __KERNEL_RCSID(0, "$NetBSD: ieee_subnormal.c,v 1.3 2004/01/23 04:12:39 simonb Exp $");
1.2       lukem      34:
1.1       phil       35: #include "ieee_internal.h"
1.2       lukem      36:
1.1       phil       37: #include <machine/psl.h>
                     38:
                     39: /* Return bit pos numbered from lsb 0 to 31. Returns 31 if no bit is set */
                     40: static int find_msb(unsigned int t) {
                     41:   static const int b_mask[] = {
                     42:     0xffff0000,
                     43:     0xff00ff00,
                     44:     0xf0f0f0f0,
                     45:     0xcccccccc,
                     46:     0xaaaaaaaa };
                     47:   int i;
                     48:   int pos = 31;
                     49:   int bit_incr = 16;           /* Half no of bits in int */
                     50:   for (i = 0; i < 5; i++, bit_incr /= 2) {
                     51:     if(t & b_mask[i]) {
                     52:       t &= b_mask[i];
                     53:     }
                     54:     else {
                     55:       pos -= bit_incr;
                     56:       t &= ~b_mask[i];
                     57:     }
                     58:   }
                     59:   return pos;
                     60: }
                     61:
                     62: static int leading_zeros(union t_conv *data)
                     63: {
                     64:   unsigned int t;
                     65:   if ((t = data->d_bits.mantissa)) {
                     66:     return 19 - find_msb(t);
                     67:   }
                     68:   else if ((t = data->d_bits.mantissa2)) {
                     69:     return 51 - find_msb(t);
                     70:   }
                     71:   else
                     72:     return 52;
                     73: }
                     74:
                     75: static void lshift_mantissa(union t_conv *data, int n)
                     76: {
                     77:   unsigned long t[2];
                     78:   t[1] = data->d_bits.mantissa;
                     79:   t[0] = data->d_bits.mantissa2;
                     80:   *(unsigned long long *) t <<= n;
                     81:   data->d_bits.mantissa = t[1];
                     82:   data->d_bits.mantissa2 = t[0];
                     83: }
                     84:
                     85:
                     86: static void rshift_mantissa(union t_conv *data, int n)
                     87: {
                     88:   unsigned long t[2];
                     89:   t[1] = data->d_bits.mantissa | 0x100000;
                     90:   t[0] = data->d_bits.mantissa2;
                     91:   *(unsigned long long *) t >>= n;
                     92:   data->d_bits.mantissa = t[1];
                     93:   data->d_bits.mantissa2 = t[0];
                     94: }
                     95:
                     96: /* After this, the data is a normal double and the returned value is
                     97:  * such that:
                     98:  *   union t_conv t;
                     99:  *   t = *data;
                    100:  *   norm = normalize(&t);
                    101:  *   2**norm * t.d == data->d;
                    102:  *
                    103:  * Assume data is not already normalized.
                    104:  */
                    105: int ieee_normalize(union t_conv *data)
                    106: {
                    107:   int norm;
                    108:   if(data->d_bits.exp != 0)
                    109:     return 0;
                    110:   norm = leading_zeros(data) + 1; /* plus one for the implied bit */
                    111:   data->d_bits.exp = 1;
                    112:   lshift_mantissa(data, norm);
                    113:   return -norm;
                    114: }
                    115:
                    116: /* Divide by 2**n producing a denormalized no if necessary */
1.3       simonb    117: static void denormalize(union t_conv *data, int n)
1.1       phil      118: {
                    119:   int exp = data->d_bits.exp;
                    120:   if(exp > n)
                    121:     exp -= n;
                    122:   else if (exp <= n) {
                    123:     rshift_mantissa(data, n - exp + 1);        /* plus 1 for the implied bit */
                    124:     exp = 0;
                    125:   }
                    126:   data->d_bits.exp = exp;
                    127: }
                    128:
                    129: static int scale_and_check(union t_conv *d, int scale)
                    130: {
                    131:   int exp;
                    132:   exp = d->d_bits.exp - scale;
                    133:   if(exp >= 0x7ff) {
                    134:     /* Overflow */
                    135:     d->d_bits.exp = 0x7ff;
                    136:     d->d_bits.mantissa = 0;
                    137:     d->d_bits.mantissa2 = 0;           /* XXX sig */
                    138:     return FPC_TT_OVFL;
                    139:   }
                    140:   if(exp <= 0) {
                    141:     /* Underflow */
                    142:     denormalize(d, scale);     /* XXX sig */
                    143:     return FPC_TT_UNDFL;
                    144:   }
                    145:   d->d_bits.exp = exp;
                    146:   return FPC_TT_NONE;
                    147: }
                    148:
                    149:
                    150: /* Add two doubles, not caring if one or both is a de-norm.
                    151:  * Strategy: First scale and normalize operands so the addition
                    152:  * can't overflow or underflow, then do a normal floating point
                    153:  * addition, then scale back and possibly denormalize.
                    154:  */
                    155: int ieee_add(double data1, double *data2)
                    156: {
                    157:   union t_conv d1 = (union t_conv) data1;
                    158:   union t_conv *d2 = (union t_conv *) data2;
                    159:   int scale;
                    160:   int norm1 = ieee_normalize(&d1);
                    161:   int norm2 = ieee_normalize(d2);
                    162:   int exp1 = d1.d_bits.exp + norm1;
                    163:   int exp2 = d2->d_bits.exp + norm2;
                    164:   if(exp1 > exp2) {
                    165:     scale = EXP_DBIAS - exp1;
                    166:     exp1 = EXP_DBIAS;
                    167:     exp2 += scale;
                    168:   }
                    169:   else {
                    170:     scale = EXP_DBIAS - exp2;
                    171:     exp2 = EXP_DBIAS;
                    172:     exp1 += scale;
                    173:   }
                    174:   if(exp1 > 0) {
                    175:     d1.d_bits.exp = exp1;
                    176:     if (exp2 > 0) {
                    177:       d2->d_bits.exp = exp2;
                    178:       d2->d += d1.d;
                    179:     }
                    180:     else {
                    181:       d2->d = d1.d;
                    182:     }
                    183:   }
                    184:   else {
                    185:     d2->d_bits.exp = exp2;
                    186:   }
                    187:   return scale_and_check(d2, scale);
                    188: }
                    189:
                    190: /* Multiply two doubles, not caring if one or both is a de-norm.
                    191:  * Strategy: First scale and normalize operands so the multiplication
                    192:  * can't overflow or underflow, then do a normal floating point
                    193:  * addition, then scale back and possibly denormalize.
                    194:  */
                    195: int ieee_mul(double data1, double *data2)
                    196: {
                    197:   union t_conv d1 = (union t_conv) data1;
                    198:   union t_conv *d2 = (union t_conv *) data2;
                    199:   int scale;
                    200:   int norm1 = ieee_normalize(&d1);
                    201:   int norm2 = ieee_normalize(d2);
                    202:   int exp1 = d1.d_bits.exp + norm1;
                    203:   int exp2 = d2->d_bits.exp + norm2;
                    204:   d1.d_bits.exp = EXP_DBIAS;   /* Add EXP_DBIAS - exp1 */
                    205:   d2->d_bits.exp = EXP_DBIAS;
                    206:   d2->d *= d1.d;
                    207:   scale = EXP_DBIAS - exp1 + EXP_DBIAS - exp2;
                    208:   return scale_and_check(d2, scale);
                    209: }
                    210:
                    211: /* Divide d2 by d1, not caring if one or both is a de-norm.
                    212:  * Strategy: First scale and normalize operands so the division
                    213:  * can't overflow or underflow, then do a normal floating point
                    214:  * division, then scale back and possibly denormalize.
                    215:  */
                    216: int ieee_div(double data1, double *data2)
                    217: {
                    218:   union t_conv d1 = (union t_conv) data1;
                    219:   union t_conv *d2 = (union t_conv *) data2;
                    220:   int scale;
                    221:   int norm1 = ieee_normalize(&d1);
                    222:   int norm2 = ieee_normalize(d2);
                    223:   int exp1 = d1.d_bits.exp + norm1;
                    224:   int exp2 = d2->d_bits.exp + norm2;
                    225:   d1.d_bits.exp = EXP_DBIAS;   /* Add EXP_DBIAS - exp1 */
                    226:   d2->d_bits.exp = EXP_DBIAS;
                    227:   d2->d /= d1.d;
                    228:   scale = exp1 - exp2;
                    229:   return scale_and_check(d2, scale);
                    230: }
                    231:
                    232: /* Add mul-add three doubles d1 * d2 + d3 -> d3, not caring if any a de-norm.
                    233:  * Strategy: First scale and normalize operands so the operations
                    234:  * can't overflow or underflow, then do a normal floating point operation
                    235:  * addition, then scale back and possibly denormalize.
                    236:  */
                    237: int ieee_dot(double data1, double data2, double *data3)
                    238: {
                    239:   union t_conv d1 = (union t_conv) data1;
                    240:   union t_conv d2 = (union t_conv) data2;
                    241:   union t_conv *d3 = (union t_conv *) data3;
                    242:   int scale;
                    243:   int norm1 = ieee_normalize(&d1);
                    244:   int norm2 = ieee_normalize(&d2);
                    245:   int norm3 = ieee_normalize(d3);
                    246:   int exp1 = d1.d_bits.exp + norm1;
                    247:   int exp2 = d2.d_bits.exp + norm2;
                    248:   int exp3 = d3->d_bits.exp + norm3;
                    249:   int exp_prod = exp1 + exp2;
                    250:   if(exp_prod > exp3) {
                    251:     scale = EXP_DBIAS + EXP_DBIAS - exp_prod;
                    252:     exp1 = EXP_DBIAS;          /* Add EXP_DBIAS - exp1 */
                    253:     exp2 = EXP_DBIAS;
                    254:     exp3 += scale;
                    255:   }
                    256:   else {
                    257:     scale = EXP_DBIAS - exp3;
                    258:     exp3 = EXP_DBIAS;
                    259:     exp1 = (exp_prod + scale)/2;
                    260:     exp2 = exp_prod + scale - exp1;
                    261:   }
                    262:
                    263:   if(exp1 > 0 && exp2 > 0) {
                    264:     d1.d_bits.exp = exp1;
                    265:     d2.d_bits.exp = exp2;
                    266:     if(exp3 > 0) {
                    267:       d3->d_bits.exp = exp3;
                    268:       d3->d += d1.d * d2.d;
                    269:     }
                    270:     else {
                    271:       d3->d = d1.d * d2.d;
                    272:     }
                    273:   }
                    274:   else {
                    275:     d3->d_bits.exp = exp3;
                    276:   }
                    277:   return scale_and_check(d3, scale);
                    278: }
                    279:
                    280: /* Compare the magnitude of two ops.
                    281:  * return:     1  |op1| > |op2|
                    282:  *            -1  |op1| < |op2|
                    283:  *             0  |op1| == |op2|
                    284:  */
                    285: static int u_cmp(double data1, double data2)
                    286: {
                    287:   union t_conv d1 = (union t_conv) data1;
                    288:   union t_conv d2 = (union t_conv) data2;
                    289:   int exp1 = d1.d_bits.exp;
                    290:   int exp2 = d2.d_bits.exp;
                    291:   if (exp1 > exp2)
                    292:     return 1;
                    293:   else if (exp1 < exp2)
                    294:     return -1;
                    295:   else if (d1.d_bits.mantissa > d2.d_bits.mantissa)
                    296:     return 1;
                    297:   else if (d1.d_bits.mantissa < d2.d_bits.mantissa)
                    298:     return -1;
                    299:   else if (d1.d_bits.mantissa2 > d2.d_bits.mantissa2)
                    300:     return 1;
                    301:   else if (d1.d_bits.mantissa2 < d2.d_bits.mantissa2)
                    302:     return -1;
                    303:   else
                    304:     return 0;
                    305: }
                    306:
1.4     ! chs       307: void ieee_cmp(double data1, double data2, state *mystate)
1.1       phil      308: {
                    309:   union t_conv d1 = (union t_conv) data1;
                    310:   union t_conv d2 = (union t_conv) data2;
                    311:   int sign1 = d1.d_bits.sign;
                    312:   int sign2 = d2.d_bits.sign;
1.4     ! chs       313:   mystate->PSR &= ~(PSR_N | PSR_Z | PSR_L);
1.1       phil      314:   switch(sign2 * 2 + sign1) {
                    315:   case 2:                      /* op2 is negative op1 is positive */
1.4     ! chs       316:     mystate->PSR |= PSR_N;
1.1       phil      317:     break;
                    318:   case 1:                      /* op2 is positive op1 is negative */
                    319:     break;
                    320:   case 0:                      /* Both ops same sign */
                    321:   case 3:
                    322:     {
                    323:       int cmp = u_cmp(d1.d, d2.d);
                    324:       if(sign1)
                    325:        cmp *= -1;
                    326:       if(cmp > 0)
1.4     ! chs       327:        mystate->PSR |= PSR_N;
1.1       phil      328:       else if (cmp == 0)
1.4     ! chs       329:        mystate->PSR |= PSR_Z;
1.1       phil      330:     }
                    331:     break;
                    332:   }
                    333:   return;
                    334: }
                    335:
                    336:
                    337: int ieee_scalb(double data1, double *data2)
                    338: {
                    339:   union t_conv d1 = (union t_conv) data1;
                    340:   union t_conv *d2 = (union t_conv *) data2;
                    341:   int exp2 = d2->d_bits.exp - EXP_DBIAS;
                    342:   int n;
                    343:   if (exp2 > 16) {
                    344:     *d2 = infty;
                    345:     d2->d_bits.sign = d1.d_bits.sign;
                    346:     return FPC_TT_OVFL;
                    347:   }
                    348:   else if (exp2 < -16) {
                    349:     d2->d = 0.0;
                    350:     d2->d_bits.sign = d1.d_bits.sign;
                    351:     return FPC_TT_OVFL;
                    352:   }
                    353:   n = d2->d;
                    354:   *d2 = d1;
                    355:   return scale_and_check(d2, n);
                    356: }
                    357:
                    358: /* With no trap, hardware produces zero, which is fast but not
                    359:  * strictly correct. We should always have the hardware trap bit set
                    360:  * and generate denormalized numbers by simulation unless the user
                    361:  * indicates via the FPC_UNDE flag they want to handle it.  */
                    362:
                    363: int ieee_undfl(struct operand *op1, struct operand *op2,
1.4     ! chs       364:               struct operand *f0_op, int xopcode, state *mystate)
1.1       phil      365: {
1.4     ! chs       366:   unsigned int fsr = mystate->FSR;
1.1       phil      367:   int user_trap = FPC_TT_NONE;
                    368:   DP(1, "Underflow trap: xopcode = 0x%x\n", xopcode);
                    369:   if (fsr & FPC_UNDE) {
                    370:     user_trap = FPC_TT_UNDFL;
                    371:   }
                    372:   else {
                    373:     user_trap = FPC_TT_NONE;
                    374:     /* Calculate correct denormal output. The easiest way is to
                    375:      * prescale the operands so they won't underflow, then use the
                    376:      * hardware operation, then post scale.
                    377:      */
                    378:
                    379:     /* The exact sematics are a bit tricky. Apparently, we should only
                    380:      * set flag if we underflowed *and* there was loss of precision
                    381:      * For now, just set the flag always  XXX
                    382:      */
                    383:     fsr |= FPC_UF;
                    384:
                    385:     switch(xopcode) {
                    386:     case NEGF:
                    387:       op1->data.d_bits.sign ^= 1;
                    388:       /* Fall through */
                    389:     case MOVF:
                    390:     case MOVLF:
                    391:     case MOVFL:
                    392:       op2->data = op1->data;
                    393:       break;
                    394:     case CMPF:
1.4     ! chs       395:       ieee_cmp(op1->data.d, op2->data.d, mystate);
1.1       phil      396:       break;
                    397:     case SUBF:
                    398:       op1->data.d_bits.sign ^= 1;
                    399:       /* Fall through */
                    400:     case ADDF:
                    401:       user_trap = ieee_add(op1->data.d, &op2->data.d);
                    402:       break;
                    403:     case MULF:
                    404:       user_trap = ieee_mul(op1->data.d, &op2->data.d);
                    405:       break;
                    406:     case DIVF:
                    407:       user_trap = ieee_div(op1->data.d, &op2->data.d);
                    408:       break;
                    409:     case ROUNDFI:
                    410:     case TRUNCFI:
                    411:     case FLOORFI:
                    412:       op2->data.i = 0;
                    413:       break;
                    414:     case SCALBF:
                    415:       user_trap = ieee_scalb(op1->data.d, &op2->data.d);
                    416:       break;
                    417:     case LOGBF:
                    418:       op2->data.d = 0.0;
                    419:       break;
                    420:     case DOTF:
                    421:       user_trap = ieee_dot(op1->data.d, op2->data.d, &f0_op->data.d);
                    422:       break;
                    423:     case POLYF:
                    424:       {
                    425:        union t_conv t = op2->data;
                    426:        user_trap = ieee_dot(f0_op->data.d, op1->data.d, &t.d);
                    427:        f0_op->data = t;
                    428:       }
                    429:       break;
                    430:     }
                    431:   }
1.4     ! chs       432:   mystate->FSR = fsr;
1.1       phil      433:   return user_trap;
                    434: }

CVSweb <webmaster@jp.NetBSD.org>