This source file includes following definitions.
- roundAndPackInt32
- roundAndPackInt64
- extractFloat32Frac
- extractFloat32Exp
- extractFloat32Sign
- normalizeFloat32Subnormal
- packFloat32
- roundAndPackFloat32
- normalizeRoundAndPackFloat32
- extractFloat64Frac
- extractFloat64Exp
- extractFloat64Sign
- normalizeFloat64Subnormal
- packFloat64
- roundAndPackFloat64
- normalizeRoundAndPackFloat64
- extractFloatx80Frac
- extractFloatx80Exp
- extractFloatx80Sign
- normalizeFloatx80Subnormal
- packFloatx80
- roundAndPackFloatx80
- normalizeRoundAndPackFloatx80
- extractFloat128Frac1
- extractFloat128Frac0
- extractFloat128Exp
- extractFloat128Sign
- normalizeFloat128Subnormal
- packFloat128
- roundAndPackFloat128
- normalizeRoundAndPackFloat128
- int32_to_float32
- int32_to_float64
- int32_to_floatx80
- int32_to_float128
- int64_to_float32
- int64_to_float64
- int64_to_floatx80
- int64_to_float128
- float32_to_int32
- float32_to_int32_round_to_zero
- float32_to_int64
- float32_to_int64_round_to_zero
- float32_to_float64
- float32_to_floatx80
- float32_to_float128
- float32_round_to_int
- addFloat32Sigs
- subFloat32Sigs
- float32_add
- float32_sub
- float32_mul
- float32_div
- float32_rem
- float32_sqrt
- float32_eq
- float32_le
- float32_lt
- float32_eq_signaling
- float32_le_quiet
- float32_lt_quiet
- float64_to_int32
- float64_to_int32_round_to_zero
- float64_to_int64
- float64_to_int64_round_to_zero
- float64_to_float32
- float64_to_floatx80
- float64_to_float128
- float64_round_to_int
- addFloat64Sigs
- subFloat64Sigs
- float64_add
- float64_sub
- float64_mul
- float64_div
- float64_rem
- float64_sqrt
- float64_eq
- float64_le
- float64_lt
- float64_eq_signaling
- float64_le_quiet
- float64_lt_quiet
- floatx80_to_int32
- floatx80_to_int32_round_to_zero
- floatx80_to_int64
- floatx80_to_int64_round_to_zero
- floatx80_to_float32
- floatx80_to_float64
- floatx80_to_float128
- floatx80_round_to_int
- addFloatx80Sigs
- subFloatx80Sigs
- floatx80_add
- floatx80_sub
- floatx80_mul
- floatx80_div
- floatx80_rem
- floatx80_sqrt
- floatx80_eq
- floatx80_le
- floatx80_lt
- floatx80_eq_signaling
- floatx80_le_quiet
- floatx80_lt_quiet
- float128_to_int32
- float128_to_int32_round_to_zero
- float128_to_int64
- float128_to_int64_round_to_zero
- float128_to_float32
- float128_to_float64
- float128_to_floatx80
- float128_round_to_int
- addFloat128Sigs
- subFloat128Sigs
- float128_add
- float128_sub
- float128_mul
- float128_div
- float128_rem
- float128_sqrt
- float128_eq
- float128_le
- float128_lt
- float128_eq_signaling
- float128_le_quiet
- float128_lt_quiet
- float64_to_uint32_round_to_zero
- float32_to_uint32_round_to_zero
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52 #ifndef NO_IEEE
53
54 #include <sys/cdefs.h>
55 #if defined(LIBC_SCCS) && !defined(lint)
56 __RCSID("$NetBSD: softfloat.c,v 1.1 2001/04/26 03:10:47 ross Exp $");
57 #endif
58
59 #ifdef SOFTFLOAT_FOR_GCC
60 #include "softfloat-for-gcc.h"
61 #endif
62
63 #include "milieu.h"
64 #include "softfloat.h"
65
66
67
68
69
70 #ifndef FLOAT64_DEMANGLE
71 #define FLOAT64_DEMANGLE(a) (a)
72 #endif
73 #ifndef FLOAT64_MANGLE
74 #define FLOAT64_MANGLE(a) (a)
75 #endif
76
77
78
79
80
81
82
83
84
85
86
87
88
89 #ifdef FLOATX80
90 int8 floatx80_rounding_precision = 80;
91 #endif
92
93
94
95
96
97
98
99
100 #include "softfloat-macros.h"
101
102
103
104
105
106
107
108
109
110
111
112 #include "softfloat-specialize.h"
113
114 #ifndef SOFTFLOAT_FOR_GCC
115
116
117
118
119
120
121
122
123
124
125
126
127 static int32 roundAndPackInt32( flag zSign, bits64 absZ )
128 {
129 int8 roundingMode;
130 flag roundNearestEven;
131 int8 roundIncrement, roundBits;
132 int32 z;
133
134 roundingMode = float_rounding_mode();
135 roundNearestEven = ( roundingMode == float_round_nearest_even );
136 roundIncrement = 0x40;
137 if ( ! roundNearestEven ) {
138 if ( roundingMode == float_round_to_zero ) {
139 roundIncrement = 0;
140 }
141 else {
142 roundIncrement = 0x7F;
143 if ( zSign ) {
144 if ( roundingMode == float_round_up ) roundIncrement = 0;
145 }
146 else {
147 if ( roundingMode == float_round_down ) roundIncrement = 0;
148 }
149 }
150 }
151 roundBits = absZ & 0x7F;
152 absZ = ( absZ + roundIncrement )>>7;
153 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
154 z = absZ;
155 if ( zSign ) z = - z;
156 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
157 float_raise( float_flag_invalid );
158 return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
159 }
160 if ( roundBits ) float_set_inexact();
161 return z;
162
163 }
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178 static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
179 {
180 int8 roundingMode;
181 flag roundNearestEven, increment;
182 int64 z;
183
184 roundingMode = float_rounding_mode();
185 roundNearestEven = ( roundingMode == float_round_nearest_even );
186 increment = ( (sbits64) absZ1 < 0 );
187 if ( ! roundNearestEven ) {
188 if ( roundingMode == float_round_to_zero ) {
189 increment = 0;
190 }
191 else {
192 if ( zSign ) {
193 increment = ( roundingMode == float_round_down ) && absZ1;
194 }
195 else {
196 increment = ( roundingMode == float_round_up ) && absZ1;
197 }
198 }
199 }
200 if ( increment ) {
201 ++absZ0;
202 if ( absZ0 == 0 ) goto overflow;
203 absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
204 }
205 z = absZ0;
206 if ( zSign ) z = - z;
207 if ( z && ( ( z < 0 ) ^ zSign ) ) {
208 overflow:
209 float_raise( float_flag_invalid );
210 return
211 zSign ? (sbits64) LIT64( 0x8000000000000000 )
212 : LIT64( 0x7FFFFFFFFFFFFFFF );
213 }
214 if ( absZ1 ) float_set_inexact();
215 return z;
216
217 }
218 #endif
219
220
221
222
223
224
225 INLINE bits32 extractFloat32Frac( float32 a )
226 {
227
228 return a & 0x007FFFFF;
229
230 }
231
232
233
234
235
236
237 INLINE int16 extractFloat32Exp( float32 a )
238 {
239
240 return ( a>>23 ) & 0xFF;
241
242 }
243
244
245
246
247
248
249 INLINE flag extractFloat32Sign( float32 a )
250 {
251
252 return a>>31;
253
254 }
255
256
257
258
259
260
261
262
263
264 static void
265 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
266 {
267 int8 shiftCount;
268
269 shiftCount = countLeadingZeros32( aSig ) - 8;
270 *zSigPtr = aSig<<shiftCount;
271 *zExpPtr = 1 - shiftCount;
272
273 }
274
275
276
277
278
279
280
281
282
283
284
285
286
287 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
288 {
289
290 return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
291
292 }
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
318 {
319 int8 roundingMode;
320 flag roundNearestEven;
321 int8 roundIncrement, roundBits;
322 flag isTiny;
323
324 roundingMode = float_rounding_mode();
325 roundNearestEven = ( roundingMode == float_round_nearest_even );
326 roundIncrement = 0x40;
327 if ( ! roundNearestEven ) {
328 if ( roundingMode == float_round_to_zero ) {
329 roundIncrement = 0;
330 }
331 else {
332 roundIncrement = 0x7F;
333 if ( zSign ) {
334 if ( roundingMode == float_round_up ) roundIncrement = 0;
335 }
336 else {
337 if ( roundingMode == float_round_down ) roundIncrement = 0;
338 }
339 }
340 }
341 roundBits = zSig & 0x7F;
342 if ( 0xFD <= (bits16) zExp ) {
343 if ( ( 0xFD < zExp )
344 || ( ( zExp == 0xFD )
345 && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
346 ) {
347 float_raise( float_flag_overflow | float_flag_inexact );
348 return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
349 }
350 if ( zExp < 0 ) {
351 isTiny =
352 ( float_detect_tininess == float_tininess_before_rounding )
353 || ( zExp < -1 )
354 || ( zSig + roundIncrement < 0x80000000 );
355 shift32RightJamming( zSig, - zExp, &zSig );
356 zExp = 0;
357 roundBits = zSig & 0x7F;
358 if ( isTiny && roundBits ) float_raise( float_flag_underflow );
359 }
360 }
361 if ( roundBits ) float_set_inexact();
362 zSig = ( zSig + roundIncrement )>>7;
363 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
364 if ( zSig == 0 ) zExp = 0;
365 return packFloat32( zSign, zExp, zSig );
366
367 }
368
369
370
371
372
373
374
375
376
377
378
379 static float32
380 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
381 {
382 int8 shiftCount;
383
384 shiftCount = countLeadingZeros32( zSig ) - 1;
385 return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
386
387 }
388
389
390
391
392
393
394 INLINE bits64 extractFloat64Frac( float64 a )
395 {
396
397 return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF );
398
399 }
400
401
402
403
404
405
406 INLINE int16 extractFloat64Exp( float64 a )
407 {
408
409 return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
410
411 }
412
413
414
415
416
417
418 INLINE flag extractFloat64Sign( float64 a )
419 {
420
421 return FLOAT64_DEMANGLE(a)>>63;
422
423 }
424
425
426
427
428
429
430
431
432
433 static void
434 normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
435 {
436 int8 shiftCount;
437
438 shiftCount = countLeadingZeros64( aSig ) - 11;
439 *zSigPtr = aSig<<shiftCount;
440 *zExpPtr = 1 - shiftCount;
441
442 }
443
444
445
446
447
448
449
450
451
452
453
454
455
456 INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
457 {
458
459 return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
460 ( ( (bits64) zExp )<<52 ) + zSig );
461
462 }
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487 static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
488 {
489 int8 roundingMode;
490 flag roundNearestEven;
491 int16 roundIncrement, roundBits;
492 flag isTiny;
493
494 roundingMode = float_rounding_mode();
495 roundNearestEven = ( roundingMode == float_round_nearest_even );
496 roundIncrement = 0x200;
497 if ( ! roundNearestEven ) {
498 if ( roundingMode == float_round_to_zero ) {
499 roundIncrement = 0;
500 }
501 else {
502 roundIncrement = 0x3FF;
503 if ( zSign ) {
504 if ( roundingMode == float_round_up ) roundIncrement = 0;
505 }
506 else {
507 if ( roundingMode == float_round_down ) roundIncrement = 0;
508 }
509 }
510 }
511 roundBits = zSig & 0x3FF;
512 if ( 0x7FD <= (bits16) zExp ) {
513 if ( ( 0x7FD < zExp )
514 || ( ( zExp == 0x7FD )
515 && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
516 ) {
517 float_raise( float_flag_overflow | float_flag_inexact );
518 return FLOAT64_MANGLE(
519 FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) -
520 ( roundIncrement == 0 ));
521 }
522 if ( zExp < 0 ) {
523 isTiny =
524 ( float_detect_tininess == float_tininess_before_rounding )
525 || ( zExp < -1 )
526 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
527 shift64RightJamming( zSig, - zExp, &zSig );
528 zExp = 0;
529 roundBits = zSig & 0x3FF;
530 if ( isTiny && roundBits ) float_raise( float_flag_underflow );
531 }
532 }
533 if ( roundBits ) float_set_inexact();
534 zSig = ( zSig + roundIncrement )>>10;
535 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
536 if ( zSig == 0 ) zExp = 0;
537 return packFloat64( zSign, zExp, zSig );
538
539 }
540
541
542
543
544
545
546
547
548
549
550
551 static float64
552 normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
553 {
554 int8 shiftCount;
555
556 shiftCount = countLeadingZeros64( zSig ) - 1;
557 return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
558
559 }
560
561 #ifdef FLOATX80
562
563
564
565
566
567
568
569 INLINE bits64 extractFloatx80Frac( floatx80 a )
570 {
571
572 return a.low;
573
574 }
575
576
577
578
579
580
581
582 INLINE int32 extractFloatx80Exp( floatx80 a )
583 {
584
585 return a.high & 0x7FFF;
586
587 }
588
589
590
591
592
593
594
595 INLINE flag extractFloatx80Sign( floatx80 a )
596 {
597
598 return a.high>>15;
599
600 }
601
602
603
604
605
606
607
608
609
610 static void
611 normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
612 {
613 int8 shiftCount;
614
615 shiftCount = countLeadingZeros64( aSig );
616 *zSigPtr = aSig<<shiftCount;
617 *zExpPtr = 1 - shiftCount;
618
619 }
620
621
622
623
624
625
626
627 INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
628 {
629 floatx80 z;
630
631 z.low = zSig;
632 z.high = ( ( (bits16) zSign )<<15 ) + zExp;
633 return z;
634
635 }
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662 static floatx80
663 roundAndPackFloatx80(
664 int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
665 )
666 {
667 int8 roundingMode;
668 flag roundNearestEven, increment, isTiny;
669 int64 roundIncrement, roundMask, roundBits;
670
671 roundingMode = float_rounding_mode();
672 roundNearestEven = ( roundingMode == float_round_nearest_even );
673 if ( roundingPrecision == 80 ) goto precision80;
674 if ( roundingPrecision == 64 ) {
675 roundIncrement = LIT64( 0x0000000000000400 );
676 roundMask = LIT64( 0x00000000000007FF );
677 }
678 else if ( roundingPrecision == 32 ) {
679 roundIncrement = LIT64( 0x0000008000000000 );
680 roundMask = LIT64( 0x000000FFFFFFFFFF );
681 }
682 else {
683 goto precision80;
684 }
685 zSig0 |= ( zSig1 != 0 );
686 if ( ! roundNearestEven ) {
687 if ( roundingMode == float_round_to_zero ) {
688 roundIncrement = 0;
689 }
690 else {
691 roundIncrement = roundMask;
692 if ( zSign ) {
693 if ( roundingMode == float_round_up ) roundIncrement = 0;
694 }
695 else {
696 if ( roundingMode == float_round_down ) roundIncrement = 0;
697 }
698 }
699 }
700 roundBits = zSig0 & roundMask;
701 if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
702 if ( ( 0x7FFE < zExp )
703 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
704 ) {
705 goto overflow;
706 }
707 if ( zExp <= 0 ) {
708 isTiny =
709 ( float_detect_tininess == float_tininess_before_rounding )
710 || ( zExp < 0 )
711 || ( zSig0 <= zSig0 + roundIncrement );
712 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
713 zExp = 0;
714 roundBits = zSig0 & roundMask;
715 if ( isTiny && roundBits ) float_raise( float_flag_underflow );
716 if ( roundBits ) float_set_inexact();
717 zSig0 += roundIncrement;
718 if ( (sbits64) zSig0 < 0 ) zExp = 1;
719 roundIncrement = roundMask + 1;
720 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
721 roundMask |= roundIncrement;
722 }
723 zSig0 &= ~ roundMask;
724 return packFloatx80( zSign, zExp, zSig0 );
725 }
726 }
727 if ( roundBits ) float_set_inexact();
728 zSig0 += roundIncrement;
729 if ( zSig0 < roundIncrement ) {
730 ++zExp;
731 zSig0 = LIT64( 0x8000000000000000 );
732 }
733 roundIncrement = roundMask + 1;
734 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
735 roundMask |= roundIncrement;
736 }
737 zSig0 &= ~ roundMask;
738 if ( zSig0 == 0 ) zExp = 0;
739 return packFloatx80( zSign, zExp, zSig0 );
740 precision80:
741 increment = ( (sbits64) zSig1 < 0 );
742 if ( ! roundNearestEven ) {
743 if ( roundingMode == float_round_to_zero ) {
744 increment = 0;
745 }
746 else {
747 if ( zSign ) {
748 increment = ( roundingMode == float_round_down ) && zSig1;
749 }
750 else {
751 increment = ( roundingMode == float_round_up ) && zSig1;
752 }
753 }
754 }
755 if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
756 if ( ( 0x7FFE < zExp )
757 || ( ( zExp == 0x7FFE )
758 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
759 && increment
760 )
761 ) {
762 roundMask = 0;
763 overflow:
764 float_raise( float_flag_overflow | float_flag_inexact );
765 if ( ( roundingMode == float_round_to_zero )
766 || ( zSign && ( roundingMode == float_round_up ) )
767 || ( ! zSign && ( roundingMode == float_round_down ) )
768 ) {
769 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
770 }
771 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
772 }
773 if ( zExp <= 0 ) {
774 isTiny =
775 ( float_detect_tininess == float_tininess_before_rounding )
776 || ( zExp < 0 )
777 || ! increment
778 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
779 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
780 zExp = 0;
781 if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
782 if ( zSig1 ) float_set_inexact();
783 if ( roundNearestEven ) {
784 increment = ( (sbits64) zSig1 < 0 );
785 }
786 else {
787 if ( zSign ) {
788 increment = ( roundingMode == float_round_down ) && zSig1;
789 }
790 else {
791 increment = ( roundingMode == float_round_up ) && zSig1;
792 }
793 }
794 if ( increment ) {
795 ++zSig0;
796 zSig0 &=
797 ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
798 if ( (sbits64) zSig0 < 0 ) zExp = 1;
799 }
800 return packFloatx80( zSign, zExp, zSig0 );
801 }
802 }
803 if ( zSig1 ) float_set_inexact();
804 if ( increment ) {
805 ++zSig0;
806 if ( zSig0 == 0 ) {
807 ++zExp;
808 zSig0 = LIT64( 0x8000000000000000 );
809 }
810 else {
811 zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
812 }
813 }
814 else {
815 if ( zSig0 == 0 ) zExp = 0;
816 }
817 return packFloatx80( zSign, zExp, zSig0 );
818
819 }
820
821
822
823
824
825
826
827
828
829
830
831 static floatx80
832 normalizeRoundAndPackFloatx80(
833 int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
834 )
835 {
836 int8 shiftCount;
837
838 if ( zSig0 == 0 ) {
839 zSig0 = zSig1;
840 zSig1 = 0;
841 zExp -= 64;
842 }
843 shiftCount = countLeadingZeros64( zSig0 );
844 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
845 zExp -= shiftCount;
846 return
847 roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
848
849 }
850
851 #endif
852
853 #ifdef FLOAT128
854
855
856
857
858
859
860
861 INLINE bits64 extractFloat128Frac1( float128 a )
862 {
863
864 return a.low;
865
866 }
867
868
869
870
871
872
873
874 INLINE bits64 extractFloat128Frac0( float128 a )
875 {
876
877 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
878
879 }
880
881
882
883
884
885
886
887 INLINE int32 extractFloat128Exp( float128 a )
888 {
889
890 return ( a.high>>48 ) & 0x7FFF;
891
892 }
893
894
895
896
897
898
899 INLINE flag extractFloat128Sign( float128 a )
900 {
901
902 return a.high>>63;
903
904 }
905
906
907
908
909
910
911
912
913
914
915
916
917 static void
918 normalizeFloat128Subnormal(
919 bits64 aSig0,
920 bits64 aSig1,
921 int32 *zExpPtr,
922 bits64 *zSig0Ptr,
923 bits64 *zSig1Ptr
924 )
925 {
926 int8 shiftCount;
927
928 if ( aSig0 == 0 ) {
929 shiftCount = countLeadingZeros64( aSig1 ) - 15;
930 if ( shiftCount < 0 ) {
931 *zSig0Ptr = aSig1>>( - shiftCount );
932 *zSig1Ptr = aSig1<<( shiftCount & 63 );
933 }
934 else {
935 *zSig0Ptr = aSig1<<shiftCount;
936 *zSig1Ptr = 0;
937 }
938 *zExpPtr = - shiftCount - 63;
939 }
940 else {
941 shiftCount = countLeadingZeros64( aSig0 ) - 15;
942 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
943 *zExpPtr = 1 - shiftCount;
944 }
945
946 }
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962 INLINE float128
963 packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
964 {
965 float128 z;
966
967 z.low = zSig1;
968 z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
969 return z;
970
971 }
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995 static float128
996 roundAndPackFloat128(
997 flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
998 {
999 int8 roundingMode;
1000 flag roundNearestEven, increment, isTiny;
1001
1002 roundingMode = float_rounding_mode();
1003 roundNearestEven = ( roundingMode == float_round_nearest_even );
1004 increment = ( (sbits64) zSig2 < 0 );
1005 if ( ! roundNearestEven ) {
1006 if ( roundingMode == float_round_to_zero ) {
1007 increment = 0;
1008 }
1009 else {
1010 if ( zSign ) {
1011 increment = ( roundingMode == float_round_down ) && zSig2;
1012 }
1013 else {
1014 increment = ( roundingMode == float_round_up ) && zSig2;
1015 }
1016 }
1017 }
1018 if ( 0x7FFD <= (bits32) zExp ) {
1019 if ( ( 0x7FFD < zExp )
1020 || ( ( zExp == 0x7FFD )
1021 && eq128(
1022 LIT64( 0x0001FFFFFFFFFFFF ),
1023 LIT64( 0xFFFFFFFFFFFFFFFF ),
1024 zSig0,
1025 zSig1
1026 )
1027 && increment
1028 )
1029 ) {
1030 float_raise( float_flag_overflow | float_flag_inexact );
1031 if ( ( roundingMode == float_round_to_zero )
1032 || ( zSign && ( roundingMode == float_round_up ) )
1033 || ( ! zSign && ( roundingMode == float_round_down ) )
1034 ) {
1035 return
1036 packFloat128(
1037 zSign,
1038 0x7FFE,
1039 LIT64( 0x0000FFFFFFFFFFFF ),
1040 LIT64( 0xFFFFFFFFFFFFFFFF )
1041 );
1042 }
1043 return packFloat128( zSign, 0x7FFF, 0, 0 );
1044 }
1045 if ( zExp < 0 ) {
1046 isTiny =
1047 ( float_detect_tininess == float_tininess_before_rounding )
1048 || ( zExp < -1 )
1049 || ! increment
1050 || lt128(
1051 zSig0,
1052 zSig1,
1053 LIT64( 0x0001FFFFFFFFFFFF ),
1054 LIT64( 0xFFFFFFFFFFFFFFFF )
1055 );
1056 shift128ExtraRightJamming(
1057 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1058 zExp = 0;
1059 if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
1060 if ( roundNearestEven ) {
1061 increment = ( (sbits64) zSig2 < 0 );
1062 }
1063 else {
1064 if ( zSign ) {
1065 increment = ( roundingMode == float_round_down ) && zSig2;
1066 }
1067 else {
1068 increment = ( roundingMode == float_round_up ) && zSig2;
1069 }
1070 }
1071 }
1072 }
1073 if ( zSig2 ) float_set_inexact();
1074 if ( increment ) {
1075 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1076 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1077 }
1078 else {
1079 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1080 }
1081 return packFloat128( zSign, zExp, zSig0, zSig1 );
1082
1083 }
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096 static float128
1097 normalizeRoundAndPackFloat128(
1098 flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
1099 {
1100 int8 shiftCount;
1101 bits64 zSig2;
1102
1103 if ( zSig0 == 0 ) {
1104 zSig0 = zSig1;
1105 zSig1 = 0;
1106 zExp -= 64;
1107 }
1108 shiftCount = countLeadingZeros64( zSig0 ) - 15;
1109 if ( 0 <= shiftCount ) {
1110 zSig2 = 0;
1111 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1112 }
1113 else {
1114 shift128ExtraRightJamming(
1115 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1116 }
1117 zExp -= shiftCount;
1118 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
1119
1120 }
1121
1122 #endif
1123
1124
1125
1126
1127
1128
1129
1130
1131 float32 int32_to_float32( int32 a )
1132 {
1133 flag zSign;
1134
1135 if ( a == 0 ) return 0;
1136 if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1137 zSign = ( a < 0 );
1138 return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
1139
1140 }
1141
1142
1143
1144
1145
1146
1147
1148
1149 float64 int32_to_float64( int32 a )
1150 {
1151 flag zSign;
1152 uint32 absA;
1153 int8 shiftCount;
1154 bits64 zSig;
1155
1156 if ( a == 0 ) return 0;
1157 zSign = ( a < 0 );
1158 absA = zSign ? - a : a;
1159 shiftCount = countLeadingZeros32( absA ) + 21;
1160 zSig = absA;
1161 return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1162
1163 }
1164
1165 #ifdef FLOATX80
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175 floatx80 int32_to_floatx80( int32 a )
1176 {
1177 flag zSign;
1178 uint32 absA;
1179 int8 shiftCount;
1180 bits64 zSig;
1181
1182 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1183 zSign = ( a < 0 );
1184 absA = zSign ? - a : a;
1185 shiftCount = countLeadingZeros32( absA ) + 32;
1186 zSig = absA;
1187 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1188
1189 }
1190
1191 #endif
1192
1193 #ifdef FLOAT128
1194
1195
1196
1197
1198
1199
1200
1201
1202 float128 int32_to_float128( int32 a )
1203 {
1204 flag zSign;
1205 uint32 absA;
1206 int8 shiftCount;
1207 bits64 zSig0;
1208
1209 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1210 zSign = ( a < 0 );
1211 absA = zSign ? - a : a;
1212 shiftCount = countLeadingZeros32( absA ) + 17;
1213 zSig0 = absA;
1214 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1215
1216 }
1217
1218 #endif
1219
1220 #ifndef SOFTFLOAT_FOR_GCC
1221
1222
1223
1224
1225
1226
1227
1228 float32 int64_to_float32( int64 a )
1229 {
1230 flag zSign;
1231 uint64 absA;
1232 int8 shiftCount;
1233
1234 if ( a == 0 ) return 0;
1235 zSign = ( a < 0 );
1236 absA = zSign ? - a : a;
1237 shiftCount = countLeadingZeros64( absA ) - 40;
1238 if ( 0 <= shiftCount ) {
1239 return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1240 }
1241 else {
1242 shiftCount += 7;
1243 if ( shiftCount < 0 ) {
1244 shift64RightJamming( absA, - shiftCount, &absA );
1245 }
1246 else {
1247 absA <<= shiftCount;
1248 }
1249 return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
1250 }
1251
1252 }
1253
1254
1255
1256
1257
1258
1259
1260
1261 float64 int64_to_float64( int64 a )
1262 {
1263 flag zSign;
1264
1265 if ( a == 0 ) return 0;
1266 if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
1267 return packFloat64( 1, 0x43E, 0 );
1268 }
1269 zSign = ( a < 0 );
1270 return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
1271
1272 }
1273
1274 #ifdef FLOATX80
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284 floatx80 int64_to_floatx80( int64 a )
1285 {
1286 flag zSign;
1287 uint64 absA;
1288 int8 shiftCount;
1289
1290 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1291 zSign = ( a < 0 );
1292 absA = zSign ? - a : a;
1293 shiftCount = countLeadingZeros64( absA );
1294 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1295
1296 }
1297
1298 #endif
1299
1300 #ifdef FLOAT128
1301
1302
1303
1304
1305
1306
1307
1308
1309 float128 int64_to_float128( int64 a )
1310 {
1311 flag zSign;
1312 uint64 absA;
1313 int8 shiftCount;
1314 int32 zExp;
1315 bits64 zSig0, zSig1;
1316
1317 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1318 zSign = ( a < 0 );
1319 absA = zSign ? - a : a;
1320 shiftCount = countLeadingZeros64( absA ) + 49;
1321 zExp = 0x406E - shiftCount;
1322 if ( 64 <= shiftCount ) {
1323 zSig1 = 0;
1324 zSig0 = absA;
1325 shiftCount -= 64;
1326 }
1327 else {
1328 zSig1 = absA;
1329 zSig0 = 0;
1330 }
1331 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1332 return packFloat128( zSign, zExp, zSig0, zSig1 );
1333
1334 }
1335
1336 #endif
1337 #endif
1338
1339 #ifndef SOFTFLOAT_FOR_GCC
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351 int32 float32_to_int32( float32 a )
1352 {
1353 flag aSign;
1354 int16 aExp, shiftCount;
1355 bits32 aSig;
1356 bits64 aSig64;
1357
1358 aSig = extractFloat32Frac( a );
1359 aExp = extractFloat32Exp( a );
1360 aSign = extractFloat32Sign( a );
1361 if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1362 if ( aExp ) aSig |= 0x00800000;
1363 shiftCount = 0xAF - aExp;
1364 aSig64 = aSig;
1365 aSig64 <<= 32;
1366 if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1367 return roundAndPackInt32( aSign, aSig64 );
1368
1369 }
1370 #endif
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383 int32 float32_to_int32_round_to_zero( float32 a )
1384 {
1385 flag aSign;
1386 int16 aExp, shiftCount;
1387 bits32 aSig;
1388 int32 z;
1389
1390 aSig = extractFloat32Frac( a );
1391 aExp = extractFloat32Exp( a );
1392 aSign = extractFloat32Sign( a );
1393 shiftCount = aExp - 0x9E;
1394 if ( 0 <= shiftCount ) {
1395 if ( a != 0xCF000000 ) {
1396 float_raise( float_flag_invalid );
1397 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1398 }
1399 return (sbits32) 0x80000000;
1400 }
1401 else if ( aExp <= 0x7E ) {
1402 if ( aExp | aSig ) float_set_inexact();
1403 return 0;
1404 }
1405 aSig = ( aSig | 0x00800000 )<<8;
1406 z = aSig>>( - shiftCount );
1407 if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
1408 float_set_inexact();
1409 }
1410 if ( aSign ) z = - z;
1411 return z;
1412
1413 }
1414
1415 #ifndef SOFTFLOAT_FOR_GCC
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427 int64 float32_to_int64( float32 a )
1428 {
1429 flag aSign;
1430 int16 aExp, shiftCount;
1431 bits32 aSig;
1432 bits64 aSig64, aSigExtra;
1433
1434 aSig = extractFloat32Frac( a );
1435 aExp = extractFloat32Exp( a );
1436 aSign = extractFloat32Sign( a );
1437 shiftCount = 0xBE - aExp;
1438 if ( shiftCount < 0 ) {
1439 float_raise( float_flag_invalid );
1440 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1441 return LIT64( 0x7FFFFFFFFFFFFFFF );
1442 }
1443 return (sbits64) LIT64( 0x8000000000000000 );
1444 }
1445 if ( aExp ) aSig |= 0x00800000;
1446 aSig64 = aSig;
1447 aSig64 <<= 40;
1448 shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1449 return roundAndPackInt64( aSign, aSig64, aSigExtra );
1450
1451 }
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464 int64 float32_to_int64_round_to_zero( float32 a )
1465 {
1466 flag aSign;
1467 int16 aExp, shiftCount;
1468 bits32 aSig;
1469 bits64 aSig64;
1470 int64 z;
1471
1472 aSig = extractFloat32Frac( a );
1473 aExp = extractFloat32Exp( a );
1474 aSign = extractFloat32Sign( a );
1475 shiftCount = aExp - 0xBE;
1476 if ( 0 <= shiftCount ) {
1477 if ( a != 0xDF000000 ) {
1478 float_raise( float_flag_invalid );
1479 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1480 return LIT64( 0x7FFFFFFFFFFFFFFF );
1481 }
1482 }
1483 return (sbits64) LIT64( 0x8000000000000000 );
1484 }
1485 else if ( aExp <= 0x7E ) {
1486 if ( aExp | aSig ) float_set_inexact();
1487 return 0;
1488 }
1489 aSig64 = aSig | 0x00800000;
1490 aSig64 <<= 40;
1491 z = aSig64>>( - shiftCount );
1492 if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
1493 float_set_inexact();
1494 }
1495 if ( aSign ) z = - z;
1496 return z;
1497
1498 }
1499 #endif
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509 float64 float32_to_float64( float32 a )
1510 {
1511 flag aSign;
1512 int16 aExp;
1513 bits32 aSig;
1514
1515 aSig = extractFloat32Frac( a );
1516 aExp = extractFloat32Exp( a );
1517 aSign = extractFloat32Sign( a );
1518 if ( aExp == 0xFF ) {
1519 if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
1520 return packFloat64( aSign, 0x7FF, 0 );
1521 }
1522 if ( aExp == 0 ) {
1523 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1524 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1525 --aExp;
1526 }
1527 return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
1528
1529 }
1530
1531 #ifdef FLOATX80
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541 floatx80 float32_to_floatx80( float32 a )
1542 {
1543 flag aSign;
1544 int16 aExp;
1545 bits32 aSig;
1546
1547 aSig = extractFloat32Frac( a );
1548 aExp = extractFloat32Exp( a );
1549 aSign = extractFloat32Sign( a );
1550 if ( aExp == 0xFF ) {
1551 if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
1552 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1553 }
1554 if ( aExp == 0 ) {
1555 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1556 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1557 }
1558 aSig |= 0x00800000;
1559 return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
1560
1561 }
1562
1563 #endif
1564
1565 #ifdef FLOAT128
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575 float128 float32_to_float128( float32 a )
1576 {
1577 flag aSign;
1578 int16 aExp;
1579 bits32 aSig;
1580
1581 aSig = extractFloat32Frac( a );
1582 aExp = extractFloat32Exp( a );
1583 aSign = extractFloat32Sign( a );
1584 if ( aExp == 0xFF ) {
1585 if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
1586 return packFloat128( aSign, 0x7FFF, 0, 0 );
1587 }
1588 if ( aExp == 0 ) {
1589 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1590 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1591 --aExp;
1592 }
1593 return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
1594
1595 }
1596
1597 #endif
1598
1599 #ifndef SOFTFLOAT_FOR_GCC
1600
1601
1602
1603
1604
1605
1606
1607
1608 float32 float32_round_to_int( float32 a )
1609 {
1610 flag aSign;
1611 int16 aExp;
1612 bits32 lastBitMask, roundBitsMask;
1613 int8 roundingMode;
1614 float32 z;
1615
1616 aExp = extractFloat32Exp( a );
1617 if ( 0x96 <= aExp ) {
1618 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1619 return propagateFloat32NaN( a, a );
1620 }
1621 return a;
1622 }
1623 if ( aExp <= 0x7E ) {
1624 if ( (bits32) ( a<<1 ) == 0 ) return a;
1625 float_set_inexact();
1626 aSign = extractFloat32Sign( a );
1627 switch ( float_rounding_mode() ) {
1628 case float_round_nearest_even:
1629 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1630 return packFloat32( aSign, 0x7F, 0 );
1631 }
1632 break;
1633 case float_round_down:
1634 return aSign ? 0xBF800000 : 0;
1635 case float_round_up:
1636 return aSign ? 0x80000000 : 0x3F800000;
1637 }
1638 return packFloat32( aSign, 0, 0 );
1639 }
1640 lastBitMask = 1;
1641 lastBitMask <<= 0x96 - aExp;
1642 roundBitsMask = lastBitMask - 1;
1643 z = a;
1644 roundingMode = float_rounding_mode();
1645 if ( roundingMode == float_round_nearest_even ) {
1646 z += lastBitMask>>1;
1647 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1648 }
1649 else if ( roundingMode != float_round_to_zero ) {
1650 if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1651 z += roundBitsMask;
1652 }
1653 }
1654 z &= ~ roundBitsMask;
1655 if ( z != a ) float_set_inexact();
1656 return z;
1657
1658 }
1659 #endif
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
1671 {
1672 int16 aExp, bExp, zExp;
1673 bits32 aSig, bSig, zSig;
1674 int16 expDiff;
1675
1676 aSig = extractFloat32Frac( a );
1677 aExp = extractFloat32Exp( a );
1678 bSig = extractFloat32Frac( b );
1679 bExp = extractFloat32Exp( b );
1680 expDiff = aExp - bExp;
1681 aSig <<= 6;
1682 bSig <<= 6;
1683 if ( 0 < expDiff ) {
1684 if ( aExp == 0xFF ) {
1685 if ( aSig ) return propagateFloat32NaN( a, b );
1686 return a;
1687 }
1688 if ( bExp == 0 ) {
1689 --expDiff;
1690 }
1691 else {
1692 bSig |= 0x20000000;
1693 }
1694 shift32RightJamming( bSig, expDiff, &bSig );
1695 zExp = aExp;
1696 }
1697 else if ( expDiff < 0 ) {
1698 if ( bExp == 0xFF ) {
1699 if ( bSig ) return propagateFloat32NaN( a, b );
1700 return packFloat32( zSign, 0xFF, 0 );
1701 }
1702 if ( aExp == 0 ) {
1703 ++expDiff;
1704 }
1705 else {
1706 aSig |= 0x20000000;
1707 }
1708 shift32RightJamming( aSig, - expDiff, &aSig );
1709 zExp = bExp;
1710 }
1711 else {
1712 if ( aExp == 0xFF ) {
1713 if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1714 return a;
1715 }
1716 if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1717 zSig = 0x40000000 + aSig + bSig;
1718 zExp = aExp;
1719 goto roundAndPack;
1720 }
1721 aSig |= 0x20000000;
1722 zSig = ( aSig + bSig )<<1;
1723 --zExp;
1724 if ( (sbits32) zSig < 0 ) {
1725 zSig = aSig + bSig;
1726 ++zExp;
1727 }
1728 roundAndPack:
1729 return roundAndPackFloat32( zSign, zExp, zSig );
1730
1731 }
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
1743 {
1744 int16 aExp, bExp, zExp;
1745 bits32 aSig, bSig, zSig;
1746 int16 expDiff;
1747
1748 aSig = extractFloat32Frac( a );
1749 aExp = extractFloat32Exp( a );
1750 bSig = extractFloat32Frac( b );
1751 bExp = extractFloat32Exp( b );
1752 expDiff = aExp - bExp;
1753 aSig <<= 7;
1754 bSig <<= 7;
1755 if ( 0 < expDiff ) goto aExpBigger;
1756 if ( expDiff < 0 ) goto bExpBigger;
1757 if ( aExp == 0xFF ) {
1758 if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1759 float_raise( float_flag_invalid );
1760 return float32_default_nan;
1761 }
1762 if ( aExp == 0 ) {
1763 aExp = 1;
1764 bExp = 1;
1765 }
1766 if ( bSig < aSig ) goto aBigger;
1767 if ( aSig < bSig ) goto bBigger;
1768 return packFloat32( float_rounding_mode() == float_round_down, 0, 0 );
1769 bExpBigger:
1770 if ( bExp == 0xFF ) {
1771 if ( bSig ) return propagateFloat32NaN( a, b );
1772 return packFloat32( zSign ^ 1, 0xFF, 0 );
1773 }
1774 if ( aExp == 0 ) {
1775 ++expDiff;
1776 }
1777 else {
1778 aSig |= 0x40000000;
1779 }
1780 shift32RightJamming( aSig, - expDiff, &aSig );
1781 bSig |= 0x40000000;
1782 bBigger:
1783 zSig = bSig - aSig;
1784 zExp = bExp;
1785 zSign ^= 1;
1786 goto normalizeRoundAndPack;
1787 aExpBigger:
1788 if ( aExp == 0xFF ) {
1789 if ( aSig ) return propagateFloat32NaN( a, b );
1790 return a;
1791 }
1792 if ( bExp == 0 ) {
1793 --expDiff;
1794 }
1795 else {
1796 bSig |= 0x40000000;
1797 }
1798 shift32RightJamming( bSig, expDiff, &bSig );
1799 aSig |= 0x40000000;
1800 aBigger:
1801 zSig = aSig - bSig;
1802 zExp = aExp;
1803 normalizeRoundAndPack:
1804 --zExp;
1805 return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
1806
1807 }
1808
1809
1810
1811
1812
1813
1814
1815
1816 float32 float32_add( float32 a, float32 b )
1817 {
1818 flag aSign, bSign;
1819
1820 aSign = extractFloat32Sign( a );
1821 bSign = extractFloat32Sign( b );
1822 if ( aSign == bSign ) {
1823 return addFloat32Sigs( a, b, aSign );
1824 }
1825 else {
1826 return subFloat32Sigs( a, b, aSign );
1827 }
1828
1829 }
1830
1831
1832
1833
1834
1835
1836
1837
1838 float32 float32_sub( float32 a, float32 b )
1839 {
1840 flag aSign, bSign;
1841
1842 aSign = extractFloat32Sign( a );
1843 bSign = extractFloat32Sign( b );
1844 if ( aSign == bSign ) {
1845 return subFloat32Sigs( a, b, aSign );
1846 }
1847 else {
1848 return addFloat32Sigs( a, b, aSign );
1849 }
1850
1851 }
1852
1853
1854
1855
1856
1857
1858
1859
1860 float32 float32_mul( float32 a, float32 b )
1861 {
1862 flag aSign, bSign, zSign;
1863 int16 aExp, bExp, zExp;
1864 bits32 aSig, bSig;
1865 bits64 zSig64;
1866 bits32 zSig;
1867
1868 aSig = extractFloat32Frac( a );
1869 aExp = extractFloat32Exp( a );
1870 aSign = extractFloat32Sign( a );
1871 bSig = extractFloat32Frac( b );
1872 bExp = extractFloat32Exp( b );
1873 bSign = extractFloat32Sign( b );
1874 zSign = aSign ^ bSign;
1875 if ( aExp == 0xFF ) {
1876 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1877 return propagateFloat32NaN( a, b );
1878 }
1879 if ( ( bExp | bSig ) == 0 ) {
1880 float_raise( float_flag_invalid );
1881 return float32_default_nan;
1882 }
1883 return packFloat32( zSign, 0xFF, 0 );
1884 }
1885 if ( bExp == 0xFF ) {
1886 if ( bSig ) return propagateFloat32NaN( a, b );
1887 if ( ( aExp | aSig ) == 0 ) {
1888 float_raise( float_flag_invalid );
1889 return float32_default_nan;
1890 }
1891 return packFloat32( zSign, 0xFF, 0 );
1892 }
1893 if ( aExp == 0 ) {
1894 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1895 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1896 }
1897 if ( bExp == 0 ) {
1898 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1899 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1900 }
1901 zExp = aExp + bExp - 0x7F;
1902 aSig = ( aSig | 0x00800000 )<<7;
1903 bSig = ( bSig | 0x00800000 )<<8;
1904 shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1905 zSig = zSig64;
1906 if ( 0 <= (sbits32) ( zSig<<1 ) ) {
1907 zSig <<= 1;
1908 --zExp;
1909 }
1910 return roundAndPackFloat32( zSign, zExp, zSig );
1911
1912 }
1913
1914
1915
1916
1917
1918
1919
1920
1921 float32 float32_div( float32 a, float32 b )
1922 {
1923 flag aSign, bSign, zSign;
1924 int16 aExp, bExp, zExp;
1925 bits32 aSig, bSig, zSig;
1926
1927 aSig = extractFloat32Frac( a );
1928 aExp = extractFloat32Exp( a );
1929 aSign = extractFloat32Sign( a );
1930 bSig = extractFloat32Frac( b );
1931 bExp = extractFloat32Exp( b );
1932 bSign = extractFloat32Sign( b );
1933 zSign = aSign ^ bSign;
1934 if ( aExp == 0xFF ) {
1935 if ( aSig ) return propagateFloat32NaN( a, b );
1936 if ( bExp == 0xFF ) {
1937 if ( bSig ) return propagateFloat32NaN( a, b );
1938 float_raise( float_flag_invalid );
1939 return float32_default_nan;
1940 }
1941 return packFloat32( zSign, 0xFF, 0 );
1942 }
1943 if ( bExp == 0xFF ) {
1944 if ( bSig ) return propagateFloat32NaN( a, b );
1945 return packFloat32( zSign, 0, 0 );
1946 }
1947 if ( bExp == 0 ) {
1948 if ( bSig == 0 ) {
1949 if ( ( aExp | aSig ) == 0 ) {
1950 float_raise( float_flag_invalid );
1951 return float32_default_nan;
1952 }
1953 float_raise( float_flag_divbyzero );
1954 return packFloat32( zSign, 0xFF, 0 );
1955 }
1956 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1957 }
1958 if ( aExp == 0 ) {
1959 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1960 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1961 }
1962 zExp = aExp - bExp + 0x7D;
1963 aSig = ( aSig | 0x00800000 )<<7;
1964 bSig = ( bSig | 0x00800000 )<<8;
1965 if ( bSig <= ( aSig + aSig ) ) {
1966 aSig >>= 1;
1967 ++zExp;
1968 }
1969 zSig = ( ( (bits64) aSig )<<32 ) / bSig;
1970 if ( ( zSig & 0x3F ) == 0 ) {
1971 zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
1972 }
1973 return roundAndPackFloat32( zSign, zExp, zSig );
1974
1975 }
1976
1977 #ifndef SOFTFLOAT_FOR_GCC
1978
1979
1980
1981
1982
1983
1984
1985 float32 float32_rem( float32 a, float32 b )
1986 {
1987 flag aSign, bSign, zSign;
1988 int16 aExp, bExp, expDiff;
1989 bits32 aSig, bSig;
1990 bits32 q;
1991 bits64 aSig64, bSig64, q64;
1992 bits32 alternateASig;
1993 sbits32 sigMean;
1994
1995 aSig = extractFloat32Frac( a );
1996 aExp = extractFloat32Exp( a );
1997 aSign = extractFloat32Sign( a );
1998 bSig = extractFloat32Frac( b );
1999 bExp = extractFloat32Exp( b );
2000 bSign = extractFloat32Sign( b );
2001 if ( aExp == 0xFF ) {
2002 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2003 return propagateFloat32NaN( a, b );
2004 }
2005 float_raise( float_flag_invalid );
2006 return float32_default_nan;
2007 }
2008 if ( bExp == 0xFF ) {
2009 if ( bSig ) return propagateFloat32NaN( a, b );
2010 return a;
2011 }
2012 if ( bExp == 0 ) {
2013 if ( bSig == 0 ) {
2014 float_raise( float_flag_invalid );
2015 return float32_default_nan;
2016 }
2017 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2018 }
2019 if ( aExp == 0 ) {
2020 if ( aSig == 0 ) return a;
2021 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2022 }
2023 expDiff = aExp - bExp;
2024 aSig |= 0x00800000;
2025 bSig |= 0x00800000;
2026 if ( expDiff < 32 ) {
2027 aSig <<= 8;
2028 bSig <<= 8;
2029 if ( expDiff < 0 ) {
2030 if ( expDiff < -1 ) return a;
2031 aSig >>= 1;
2032 }
2033 q = ( bSig <= aSig );
2034 if ( q ) aSig -= bSig;
2035 if ( 0 < expDiff ) {
2036 q = ( ( (bits64) aSig )<<32 ) / bSig;
2037 q >>= 32 - expDiff;
2038 bSig >>= 2;
2039 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2040 }
2041 else {
2042 aSig >>= 2;
2043 bSig >>= 2;
2044 }
2045 }
2046 else {
2047 if ( bSig <= aSig ) aSig -= bSig;
2048 aSig64 = ( (bits64) aSig )<<40;
2049 bSig64 = ( (bits64) bSig )<<40;
2050 expDiff -= 64;
2051 while ( 0 < expDiff ) {
2052 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2053 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2054 aSig64 = - ( ( bSig * q64 )<<38 );
2055 expDiff -= 62;
2056 }
2057 expDiff += 64;
2058 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2059 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2060 q = q64>>( 64 - expDiff );
2061 bSig <<= 6;
2062 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2063 }
2064 do {
2065 alternateASig = aSig;
2066 ++q;
2067 aSig -= bSig;
2068 } while ( 0 <= (sbits32) aSig );
2069 sigMean = aSig + alternateASig;
2070 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2071 aSig = alternateASig;
2072 }
2073 zSign = ( (sbits32) aSig < 0 );
2074 if ( zSign ) aSig = - aSig;
2075 return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
2076
2077 }
2078 #endif
2079
2080 #ifndef SOFTFLOAT_FOR_GCC
2081
2082
2083
2084
2085
2086
2087
2088 float32 float32_sqrt( float32 a )
2089 {
2090 flag aSign;
2091 int16 aExp, zExp;
2092 bits32 aSig, zSig;
2093 bits64 rem, term;
2094
2095 aSig = extractFloat32Frac( a );
2096 aExp = extractFloat32Exp( a );
2097 aSign = extractFloat32Sign( a );
2098 if ( aExp == 0xFF ) {
2099 if ( aSig ) return propagateFloat32NaN( a, 0 );
2100 if ( ! aSign ) return a;
2101 float_raise( float_flag_invalid );
2102 return float32_default_nan;
2103 }
2104 if ( aSign ) {
2105 if ( ( aExp | aSig ) == 0 ) return a;
2106 float_raise( float_flag_invalid );
2107 return float32_default_nan;
2108 }
2109 if ( aExp == 0 ) {
2110 if ( aSig == 0 ) return 0;
2111 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2112 }
2113 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2114 aSig = ( aSig | 0x00800000 )<<8;
2115 zSig = estimateSqrt32( aExp, aSig ) + 2;
2116 if ( ( zSig & 0x7F ) <= 5 ) {
2117 if ( zSig < 2 ) {
2118 zSig = 0x7FFFFFFF;
2119 goto roundAndPack;
2120 }
2121 aSig >>= aExp & 1;
2122 term = ( (bits64) zSig ) * zSig;
2123 rem = ( ( (bits64) aSig )<<32 ) - term;
2124 while ( (sbits64) rem < 0 ) {
2125 --zSig;
2126 rem += ( ( (bits64) zSig )<<1 ) | 1;
2127 }
2128 zSig |= ( rem != 0 );
2129 }
2130 shift32RightJamming( zSig, 1, &zSig );
2131 roundAndPack:
2132 return roundAndPackFloat32( 0, zExp, zSig );
2133
2134 }
2135 #endif
2136
2137
2138
2139
2140
2141
2142
2143
2144 flag float32_eq( float32 a, float32 b )
2145 {
2146
2147 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2148 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2149 ) {
2150 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2151 float_raise( float_flag_invalid );
2152 }
2153 return 0;
2154 }
2155 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
2156
2157 }
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167 flag float32_le( float32 a, float32 b )
2168 {
2169 flag aSign, bSign;
2170
2171 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2172 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2173 ) {
2174 float_raise( float_flag_invalid );
2175 return 0;
2176 }
2177 aSign = extractFloat32Sign( a );
2178 bSign = extractFloat32Sign( b );
2179 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
2180 return ( a == b ) || ( aSign ^ ( a < b ) );
2181
2182 }
2183
2184
2185
2186
2187
2188
2189
2190
2191 flag float32_lt( float32 a, float32 b )
2192 {
2193 flag aSign, bSign;
2194
2195 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2196 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2197 ) {
2198 float_raise( float_flag_invalid );
2199 return 0;
2200 }
2201 aSign = extractFloat32Sign( a );
2202 bSign = extractFloat32Sign( b );
2203 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
2204 return ( a != b ) && ( aSign ^ ( a < b ) );
2205
2206 }
2207
2208 #ifndef SOFTFLOAT_FOR_GCC
2209
2210
2211
2212
2213
2214
2215
2216
2217 flag float32_eq_signaling( float32 a, float32 b )
2218 {
2219
2220 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2221 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2222 ) {
2223 float_raise( float_flag_invalid );
2224 return 0;
2225 }
2226 return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
2227
2228 }
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238 flag float32_le_quiet( float32 a, float32 b )
2239 {
2240 flag aSign, bSign;
2241
2242 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2243 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2244 ) {
2245 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2246 float_raise( float_flag_invalid );
2247 }
2248 return 0;
2249 }
2250 aSign = extractFloat32Sign( a );
2251 bSign = extractFloat32Sign( b );
2252 if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
2253 return ( a == b ) || ( aSign ^ ( a < b ) );
2254
2255 }
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265 flag float32_lt_quiet( float32 a, float32 b )
2266 {
2267 flag aSign, bSign;
2268
2269 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2270 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2271 ) {
2272 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2273 float_raise( float_flag_invalid );
2274 }
2275 return 0;
2276 }
2277 aSign = extractFloat32Sign( a );
2278 bSign = extractFloat32Sign( b );
2279 if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
2280 return ( a != b ) && ( aSign ^ ( a < b ) );
2281
2282 }
2283 #endif
2284
2285 #ifndef SOFTFLOAT_FOR_GCC
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297 int32 float64_to_int32( float64 a )
2298 {
2299 flag aSign;
2300 int16 aExp, shiftCount;
2301 bits64 aSig;
2302
2303 aSig = extractFloat64Frac( a );
2304 aExp = extractFloat64Exp( a );
2305 aSign = extractFloat64Sign( a );
2306 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2307 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2308 shiftCount = 0x42C - aExp;
2309 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2310 return roundAndPackInt32( aSign, aSig );
2311
2312 }
2313 #endif
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326 int32 float64_to_int32_round_to_zero( float64 a )
2327 {
2328 flag aSign;
2329 int16 aExp, shiftCount;
2330 bits64 aSig, savedASig;
2331 int32 z;
2332
2333 aSig = extractFloat64Frac( a );
2334 aExp = extractFloat64Exp( a );
2335 aSign = extractFloat64Sign( a );
2336 if ( 0x41E < aExp ) {
2337 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2338 goto invalid;
2339 }
2340 else if ( aExp < 0x3FF ) {
2341 if ( aExp || aSig ) float_set_inexact();
2342 return 0;
2343 }
2344 aSig |= LIT64( 0x0010000000000000 );
2345 shiftCount = 0x433 - aExp;
2346 savedASig = aSig;
2347 aSig >>= shiftCount;
2348 z = aSig;
2349 if ( aSign ) z = - z;
2350 if ( ( z < 0 ) ^ aSign ) {
2351 invalid:
2352 float_raise( float_flag_invalid );
2353 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
2354 }
2355 if ( ( aSig<<shiftCount ) != savedASig ) {
2356 float_set_inexact();
2357 }
2358 return z;
2359
2360 }
2361
2362 #ifndef SOFTFLOAT_FOR_GCC
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374 int64 float64_to_int64( float64 a )
2375 {
2376 flag aSign;
2377 int16 aExp, shiftCount;
2378 bits64 aSig, aSigExtra;
2379
2380 aSig = extractFloat64Frac( a );
2381 aExp = extractFloat64Exp( a );
2382 aSign = extractFloat64Sign( a );
2383 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2384 shiftCount = 0x433 - aExp;
2385 if ( shiftCount <= 0 ) {
2386 if ( 0x43E < aExp ) {
2387 float_raise( float_flag_invalid );
2388 if ( ! aSign
2389 || ( ( aExp == 0x7FF )
2390 && ( aSig != LIT64( 0x0010000000000000 ) ) )
2391 ) {
2392 return LIT64( 0x7FFFFFFFFFFFFFFF );
2393 }
2394 return (sbits64) LIT64( 0x8000000000000000 );
2395 }
2396 aSigExtra = 0;
2397 aSig <<= - shiftCount;
2398 }
2399 else {
2400 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2401 }
2402 return roundAndPackInt64( aSign, aSig, aSigExtra );
2403
2404 }
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417 int64 float64_to_int64_round_to_zero( float64 a )
2418 {
2419 flag aSign;
2420 int16 aExp, shiftCount;
2421 bits64 aSig;
2422 int64 z;
2423
2424 aSig = extractFloat64Frac( a );
2425 aExp = extractFloat64Exp( a );
2426 aSign = extractFloat64Sign( a );
2427 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2428 shiftCount = aExp - 0x433;
2429 if ( 0 <= shiftCount ) {
2430 if ( 0x43E <= aExp ) {
2431 if ( a != LIT64( 0xC3E0000000000000 ) ) {
2432 float_raise( float_flag_invalid );
2433 if ( ! aSign
2434 || ( ( aExp == 0x7FF )
2435 && ( aSig != LIT64( 0x0010000000000000 ) ) )
2436 ) {
2437 return LIT64( 0x7FFFFFFFFFFFFFFF );
2438 }
2439 }
2440 return (sbits64) LIT64( 0x8000000000000000 );
2441 }
2442 z = aSig<<shiftCount;
2443 }
2444 else {
2445 if ( aExp < 0x3FE ) {
2446 if ( aExp | aSig ) float_set_inexact();
2447 return 0;
2448 }
2449 z = aSig>>( - shiftCount );
2450 if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
2451 float_set_inexact();
2452 }
2453 }
2454 if ( aSign ) z = - z;
2455 return z;
2456
2457 }
2458 #endif
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468 float32 float64_to_float32( float64 a )
2469 {
2470 flag aSign;
2471 int16 aExp;
2472 bits64 aSig;
2473 bits32 zSig;
2474
2475 aSig = extractFloat64Frac( a );
2476 aExp = extractFloat64Exp( a );
2477 aSign = extractFloat64Sign( a );
2478 if ( aExp == 0x7FF ) {
2479 if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
2480 return packFloat32( aSign, 0xFF, 0 );
2481 }
2482 shift64RightJamming( aSig, 22, &aSig );
2483 zSig = aSig;
2484 if ( aExp || zSig ) {
2485 zSig |= 0x40000000;
2486 aExp -= 0x381;
2487 }
2488 return roundAndPackFloat32( aSign, aExp, zSig );
2489
2490 }
2491
2492 #ifdef FLOATX80
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502 floatx80 float64_to_floatx80( float64 a )
2503 {
2504 flag aSign;
2505 int16 aExp;
2506 bits64 aSig;
2507
2508 aSig = extractFloat64Frac( a );
2509 aExp = extractFloat64Exp( a );
2510 aSign = extractFloat64Sign( a );
2511 if ( aExp == 0x7FF ) {
2512 if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
2513 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2514 }
2515 if ( aExp == 0 ) {
2516 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
2517 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2518 }
2519 return
2520 packFloatx80(
2521 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
2522
2523 }
2524
2525 #endif
2526
2527 #ifdef FLOAT128
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537 float128 float64_to_float128( float64 a )
2538 {
2539 flag aSign;
2540 int16 aExp;
2541 bits64 aSig, zSig0, zSig1;
2542
2543 aSig = extractFloat64Frac( a );
2544 aExp = extractFloat64Exp( a );
2545 aSign = extractFloat64Sign( a );
2546 if ( aExp == 0x7FF ) {
2547 if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
2548 return packFloat128( aSign, 0x7FFF, 0, 0 );
2549 }
2550 if ( aExp == 0 ) {
2551 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
2552 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2553 --aExp;
2554 }
2555 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
2556 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
2557
2558 }
2559
2560 #endif
2561
2562 #ifndef SOFTFLOAT_FOR_GCC
2563
2564
2565
2566
2567
2568
2569
2570
2571 float64 float64_round_to_int( float64 a )
2572 {
2573 flag aSign;
2574 int16 aExp;
2575 bits64 lastBitMask, roundBitsMask;
2576 int8 roundingMode;
2577 float64 z;
2578
2579 aExp = extractFloat64Exp( a );
2580 if ( 0x433 <= aExp ) {
2581 if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
2582 return propagateFloat64NaN( a, a );
2583 }
2584 return a;
2585 }
2586 if ( aExp < 0x3FF ) {
2587 if ( (bits64) ( a<<1 ) == 0 ) return a;
2588 float_set_inexact();
2589 aSign = extractFloat64Sign( a );
2590 switch ( float_rounding_mode() ) {
2591 case float_round_nearest_even:
2592 if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
2593 return packFloat64( aSign, 0x3FF, 0 );
2594 }
2595 break;
2596 case float_round_down:
2597 return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
2598 case float_round_up:
2599 return
2600 aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
2601 }
2602 return packFloat64( aSign, 0, 0 );
2603 }
2604 lastBitMask = 1;
2605 lastBitMask <<= 0x433 - aExp;
2606 roundBitsMask = lastBitMask - 1;
2607 z = a;
2608 roundingMode = float_rounding_mode();
2609 if ( roundingMode == float_round_nearest_even ) {
2610 z += lastBitMask>>1;
2611 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
2612 }
2613 else if ( roundingMode != float_round_to_zero ) {
2614 if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
2615 z += roundBitsMask;
2616 }
2617 }
2618 z &= ~ roundBitsMask;
2619 if ( z != a ) float_set_inexact();
2620 return z;
2621
2622 }
2623 #endif
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
2635 {
2636 int16 aExp, bExp, zExp;
2637 bits64 aSig, bSig, zSig;
2638 int16 expDiff;
2639
2640 aSig = extractFloat64Frac( a );
2641 aExp = extractFloat64Exp( a );
2642 bSig = extractFloat64Frac( b );
2643 bExp = extractFloat64Exp( b );
2644 expDiff = aExp - bExp;
2645 aSig <<= 9;
2646 bSig <<= 9;
2647 if ( 0 < expDiff ) {
2648 if ( aExp == 0x7FF ) {
2649 if ( aSig ) return propagateFloat64NaN( a, b );
2650 return a;
2651 }
2652 if ( bExp == 0 ) {
2653 --expDiff;
2654 }
2655 else {
2656 bSig |= LIT64( 0x2000000000000000 );
2657 }
2658 shift64RightJamming( bSig, expDiff, &bSig );
2659 zExp = aExp;
2660 }
2661 else if ( expDiff < 0 ) {
2662 if ( bExp == 0x7FF ) {
2663 if ( bSig ) return propagateFloat64NaN( a, b );
2664 return packFloat64( zSign, 0x7FF, 0 );
2665 }
2666 if ( aExp == 0 ) {
2667 ++expDiff;
2668 }
2669 else {
2670 aSig |= LIT64( 0x2000000000000000 );
2671 }
2672 shift64RightJamming( aSig, - expDiff, &aSig );
2673 zExp = bExp;
2674 }
2675 else {
2676 if ( aExp == 0x7FF ) {
2677 if ( aSig | bSig ) return propagateFloat64NaN( a, b );
2678 return a;
2679 }
2680 if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
2681 zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
2682 zExp = aExp;
2683 goto roundAndPack;
2684 }
2685 aSig |= LIT64( 0x2000000000000000 );
2686 zSig = ( aSig + bSig )<<1;
2687 --zExp;
2688 if ( (sbits64) zSig < 0 ) {
2689 zSig = aSig + bSig;
2690 ++zExp;
2691 }
2692 roundAndPack:
2693 return roundAndPackFloat64( zSign, zExp, zSig );
2694
2695 }
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
2707 {
2708 int16 aExp, bExp, zExp;
2709 bits64 aSig, bSig, zSig;
2710 int16 expDiff;
2711
2712 aSig = extractFloat64Frac( a );
2713 aExp = extractFloat64Exp( a );
2714 bSig = extractFloat64Frac( b );
2715 bExp = extractFloat64Exp( b );
2716 expDiff = aExp - bExp;
2717 aSig <<= 10;
2718 bSig <<= 10;
2719 if ( 0 < expDiff ) goto aExpBigger;
2720 if ( expDiff < 0 ) goto bExpBigger;
2721 if ( aExp == 0x7FF ) {
2722 if ( aSig | bSig ) return propagateFloat64NaN( a, b );
2723 float_raise( float_flag_invalid );
2724 return float64_default_nan;
2725 }
2726 if ( aExp == 0 ) {
2727 aExp = 1;
2728 bExp = 1;
2729 }
2730 if ( bSig < aSig ) goto aBigger;
2731 if ( aSig < bSig ) goto bBigger;
2732 return packFloat64( float_rounding_mode() == float_round_down, 0, 0 );
2733 bExpBigger:
2734 if ( bExp == 0x7FF ) {
2735 if ( bSig ) return propagateFloat64NaN( a, b );
2736 return packFloat64( zSign ^ 1, 0x7FF, 0 );
2737 }
2738 if ( aExp == 0 ) {
2739 ++expDiff;
2740 }
2741 else {
2742 aSig |= LIT64( 0x4000000000000000 );
2743 }
2744 shift64RightJamming( aSig, - expDiff, &aSig );
2745 bSig |= LIT64( 0x4000000000000000 );
2746 bBigger:
2747 zSig = bSig - aSig;
2748 zExp = bExp;
2749 zSign ^= 1;
2750 goto normalizeRoundAndPack;
2751 aExpBigger:
2752 if ( aExp == 0x7FF ) {
2753 if ( aSig ) return propagateFloat64NaN( a, b );
2754 return a;
2755 }
2756 if ( bExp == 0 ) {
2757 --expDiff;
2758 }
2759 else {
2760 bSig |= LIT64( 0x4000000000000000 );
2761 }
2762 shift64RightJamming( bSig, expDiff, &bSig );
2763 aSig |= LIT64( 0x4000000000000000 );
2764 aBigger:
2765 zSig = aSig - bSig;
2766 zExp = aExp;
2767 normalizeRoundAndPack:
2768 --zExp;
2769 return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
2770
2771 }
2772
2773
2774
2775
2776
2777
2778
2779
2780 float64 float64_add( float64 a, float64 b )
2781 {
2782 flag aSign, bSign;
2783
2784 aSign = extractFloat64Sign( a );
2785 bSign = extractFloat64Sign( b );
2786 if ( aSign == bSign ) {
2787 return addFloat64Sigs( a, b, aSign );
2788 }
2789 else {
2790 return subFloat64Sigs( a, b, aSign );
2791 }
2792
2793 }
2794
2795
2796
2797
2798
2799
2800
2801
2802 float64 float64_sub( float64 a, float64 b )
2803 {
2804 flag aSign, bSign;
2805
2806 aSign = extractFloat64Sign( a );
2807 bSign = extractFloat64Sign( b );
2808 if ( aSign == bSign ) {
2809 return subFloat64Sigs( a, b, aSign );
2810 }
2811 else {
2812 return addFloat64Sigs( a, b, aSign );
2813 }
2814
2815 }
2816
2817
2818
2819
2820
2821
2822
2823
2824 float64 float64_mul( float64 a, float64 b )
2825 {
2826 flag aSign, bSign, zSign;
2827 int16 aExp, bExp, zExp;
2828 bits64 aSig, bSig, zSig0, zSig1;
2829
2830 aSig = extractFloat64Frac( a );
2831 aExp = extractFloat64Exp( a );
2832 aSign = extractFloat64Sign( a );
2833 bSig = extractFloat64Frac( b );
2834 bExp = extractFloat64Exp( b );
2835 bSign = extractFloat64Sign( b );
2836 zSign = aSign ^ bSign;
2837 if ( aExp == 0x7FF ) {
2838 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2839 return propagateFloat64NaN( a, b );
2840 }
2841 if ( ( bExp | bSig ) == 0 ) {
2842 float_raise( float_flag_invalid );
2843 return float64_default_nan;
2844 }
2845 return packFloat64( zSign, 0x7FF, 0 );
2846 }
2847 if ( bExp == 0x7FF ) {
2848 if ( bSig ) return propagateFloat64NaN( a, b );
2849 if ( ( aExp | aSig ) == 0 ) {
2850 float_raise( float_flag_invalid );
2851 return float64_default_nan;
2852 }
2853 return packFloat64( zSign, 0x7FF, 0 );
2854 }
2855 if ( aExp == 0 ) {
2856 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2857 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2858 }
2859 if ( bExp == 0 ) {
2860 if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
2861 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2862 }
2863 zExp = aExp + bExp - 0x3FF;
2864 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2865 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2866 mul64To128( aSig, bSig, &zSig0, &zSig1 );
2867 zSig0 |= ( zSig1 != 0 );
2868 if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
2869 zSig0 <<= 1;
2870 --zExp;
2871 }
2872 return roundAndPackFloat64( zSign, zExp, zSig0 );
2873
2874 }
2875
2876
2877
2878
2879
2880
2881
2882
2883 float64 float64_div( float64 a, float64 b )
2884 {
2885 flag aSign, bSign, zSign;
2886 int16 aExp, bExp, zExp;
2887 bits64 aSig, bSig, zSig;
2888 bits64 rem0, rem1;
2889 bits64 term0, term1;
2890
2891 aSig = extractFloat64Frac( a );
2892 aExp = extractFloat64Exp( a );
2893 aSign = extractFloat64Sign( a );
2894 bSig = extractFloat64Frac( b );
2895 bExp = extractFloat64Exp( b );
2896 bSign = extractFloat64Sign( b );
2897 zSign = aSign ^ bSign;
2898 if ( aExp == 0x7FF ) {
2899 if ( aSig ) return propagateFloat64NaN( a, b );
2900 if ( bExp == 0x7FF ) {
2901 if ( bSig ) return propagateFloat64NaN( a, b );
2902 float_raise( float_flag_invalid );
2903 return float64_default_nan;
2904 }
2905 return packFloat64( zSign, 0x7FF, 0 );
2906 }
2907 if ( bExp == 0x7FF ) {
2908 if ( bSig ) return propagateFloat64NaN( a, b );
2909 return packFloat64( zSign, 0, 0 );
2910 }
2911 if ( bExp == 0 ) {
2912 if ( bSig == 0 ) {
2913 if ( ( aExp | aSig ) == 0 ) {
2914 float_raise( float_flag_invalid );
2915 return float64_default_nan;
2916 }
2917 float_raise( float_flag_divbyzero );
2918 return packFloat64( zSign, 0x7FF, 0 );
2919 }
2920 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2921 }
2922 if ( aExp == 0 ) {
2923 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2924 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2925 }
2926 zExp = aExp - bExp + 0x3FD;
2927 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2928 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2929 if ( bSig <= ( aSig + aSig ) ) {
2930 aSig >>= 1;
2931 ++zExp;
2932 }
2933 zSig = estimateDiv128To64( aSig, 0, bSig );
2934 if ( ( zSig & 0x1FF ) <= 2 ) {
2935 mul64To128( bSig, zSig, &term0, &term1 );
2936 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2937 while ( (sbits64) rem0 < 0 ) {
2938 --zSig;
2939 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2940 }
2941 zSig |= ( rem1 != 0 );
2942 }
2943 return roundAndPackFloat64( zSign, zExp, zSig );
2944
2945 }
2946
2947 #ifndef SOFTFLOAT_FOR_GCC
2948
2949
2950
2951
2952
2953
2954
2955 float64 float64_rem( float64 a, float64 b )
2956 {
2957 flag aSign, bSign, zSign;
2958 int16 aExp, bExp, expDiff;
2959 bits64 aSig, bSig;
2960 bits64 q, alternateASig;
2961 sbits64 sigMean;
2962
2963 aSig = extractFloat64Frac( a );
2964 aExp = extractFloat64Exp( a );
2965 aSign = extractFloat64Sign( a );
2966 bSig = extractFloat64Frac( b );
2967 bExp = extractFloat64Exp( b );
2968 bSign = extractFloat64Sign( b );
2969 if ( aExp == 0x7FF ) {
2970 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2971 return propagateFloat64NaN( a, b );
2972 }
2973 float_raise( float_flag_invalid );
2974 return float64_default_nan;
2975 }
2976 if ( bExp == 0x7FF ) {
2977 if ( bSig ) return propagateFloat64NaN( a, b );
2978 return a;
2979 }
2980 if ( bExp == 0 ) {
2981 if ( bSig == 0 ) {
2982 float_raise( float_flag_invalid );
2983 return float64_default_nan;
2984 }
2985 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2986 }
2987 if ( aExp == 0 ) {
2988 if ( aSig == 0 ) return a;
2989 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2990 }
2991 expDiff = aExp - bExp;
2992 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
2993 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2994 if ( expDiff < 0 ) {
2995 if ( expDiff < -1 ) return a;
2996 aSig >>= 1;
2997 }
2998 q = ( bSig <= aSig );
2999 if ( q ) aSig -= bSig;
3000 expDiff -= 64;
3001 while ( 0 < expDiff ) {
3002 q = estimateDiv128To64( aSig, 0, bSig );
3003 q = ( 2 < q ) ? q - 2 : 0;
3004 aSig = - ( ( bSig>>2 ) * q );
3005 expDiff -= 62;
3006 }
3007 expDiff += 64;
3008 if ( 0 < expDiff ) {
3009 q = estimateDiv128To64( aSig, 0, bSig );
3010 q = ( 2 < q ) ? q - 2 : 0;
3011 q >>= 64 - expDiff;
3012 bSig >>= 2;
3013 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3014 }
3015 else {
3016 aSig >>= 2;
3017 bSig >>= 2;
3018 }
3019 do {
3020 alternateASig = aSig;
3021 ++q;
3022 aSig -= bSig;
3023 } while ( 0 <= (sbits64) aSig );
3024 sigMean = aSig + alternateASig;
3025 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3026 aSig = alternateASig;
3027 }
3028 zSign = ( (sbits64) aSig < 0 );
3029 if ( zSign ) aSig = - aSig;
3030 return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
3031
3032 }
3033
3034
3035
3036
3037
3038
3039
3040
3041 float64 float64_sqrt( float64 a )
3042 {
3043 flag aSign;
3044 int16 aExp, zExp;
3045 bits64 aSig, zSig, doubleZSig;
3046 bits64 rem0, rem1, term0, term1;
3047
3048 aSig = extractFloat64Frac( a );
3049 aExp = extractFloat64Exp( a );
3050 aSign = extractFloat64Sign( a );
3051 if ( aExp == 0x7FF ) {
3052 if ( aSig ) return propagateFloat64NaN( a, a );
3053 if ( ! aSign ) return a;
3054 float_raise( float_flag_invalid );
3055 return float64_default_nan;
3056 }
3057 if ( aSign ) {
3058 if ( ( aExp | aSig ) == 0 ) return a;
3059 float_raise( float_flag_invalid );
3060 return float64_default_nan;
3061 }
3062 if ( aExp == 0 ) {
3063 if ( aSig == 0 ) return 0;
3064 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3065 }
3066 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
3067 aSig |= LIT64( 0x0010000000000000 );
3068 zSig = estimateSqrt32( aExp, aSig>>21 );
3069 aSig <<= 9 - ( aExp & 1 );
3070 zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
3071 if ( ( zSig & 0x1FF ) <= 5 ) {
3072 doubleZSig = zSig<<1;
3073 mul64To128( zSig, zSig, &term0, &term1 );
3074 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3075 while ( (sbits64) rem0 < 0 ) {
3076 --zSig;
3077 doubleZSig -= 2;
3078 add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
3079 }
3080 zSig |= ( ( rem0 | rem1 ) != 0 );
3081 }
3082 return roundAndPackFloat64( 0, zExp, zSig );
3083
3084 }
3085 #endif
3086
3087
3088
3089
3090
3091
3092
3093
3094 flag float64_eq( float64 a, float64 b )
3095 {
3096
3097 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3098 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3099 ) {
3100 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3101 float_raise( float_flag_invalid );
3102 }
3103 return 0;
3104 }
3105 return ( a == b ) ||
3106 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
3107
3108 }
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118 flag float64_le( float64 a, float64 b )
3119 {
3120 flag aSign, bSign;
3121
3122 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3123 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3124 ) {
3125 float_raise( float_flag_invalid );
3126 return 0;
3127 }
3128 aSign = extractFloat64Sign( a );
3129 bSign = extractFloat64Sign( b );
3130 if ( aSign != bSign )
3131 return aSign ||
3132 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
3133 0 );
3134 return ( a == b ) ||
3135 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
3136
3137 }
3138
3139
3140
3141
3142
3143
3144
3145
3146 flag float64_lt( float64 a, float64 b )
3147 {
3148 flag aSign, bSign;
3149
3150 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3151 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3152 ) {
3153 float_raise( float_flag_invalid );
3154 return 0;
3155 }
3156 aSign = extractFloat64Sign( a );
3157 bSign = extractFloat64Sign( b );
3158 if ( aSign != bSign )
3159 return aSign &&
3160 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
3161 0 );
3162 return ( a != b ) &&
3163 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
3164
3165 }
3166
3167 #ifndef SOFTFLOAT_FOR_GCC
3168
3169
3170
3171
3172
3173
3174
3175
3176 flag float64_eq_signaling( float64 a, float64 b )
3177 {
3178
3179 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3180 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3181 ) {
3182 float_raise( float_flag_invalid );
3183 return 0;
3184 }
3185 return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
3186
3187 }
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197 flag float64_le_quiet( float64 a, float64 b )
3198 {
3199 flag aSign, bSign;
3200
3201 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3202 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3203 ) {
3204 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3205 float_raise( float_flag_invalid );
3206 }
3207 return 0;
3208 }
3209 aSign = extractFloat64Sign( a );
3210 bSign = extractFloat64Sign( b );
3211 if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
3212 return ( a == b ) || ( aSign ^ ( a < b ) );
3213
3214 }
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224 flag float64_lt_quiet( float64 a, float64 b )
3225 {
3226 flag aSign, bSign;
3227
3228 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3229 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3230 ) {
3231 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3232 float_raise( float_flag_invalid );
3233 }
3234 return 0;
3235 }
3236 aSign = extractFloat64Sign( a );
3237 bSign = extractFloat64Sign( b );
3238 if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
3239 return ( a != b ) && ( aSign ^ ( a < b ) );
3240
3241 }
3242 #endif
3243
3244 #ifdef FLOATX80
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257 int32 floatx80_to_int32( floatx80 a )
3258 {
3259 flag aSign;
3260 int32 aExp, shiftCount;
3261 bits64 aSig;
3262
3263 aSig = extractFloatx80Frac( a );
3264 aExp = extractFloatx80Exp( a );
3265 aSign = extractFloatx80Sign( a );
3266 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3267 shiftCount = 0x4037 - aExp;
3268 if ( shiftCount <= 0 ) shiftCount = 1;
3269 shift64RightJamming( aSig, shiftCount, &aSig );
3270 return roundAndPackInt32( aSign, aSig );
3271
3272 }
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285 int32 floatx80_to_int32_round_to_zero( floatx80 a )
3286 {
3287 flag aSign;
3288 int32 aExp, shiftCount;
3289 bits64 aSig, savedASig;
3290 int32 z;
3291
3292 aSig = extractFloatx80Frac( a );
3293 aExp = extractFloatx80Exp( a );
3294 aSign = extractFloatx80Sign( a );
3295 if ( 0x401E < aExp ) {
3296 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3297 goto invalid;
3298 }
3299 else if ( aExp < 0x3FFF ) {
3300 if ( aExp || aSig ) float_set_inexact();
3301 return 0;
3302 }
3303 shiftCount = 0x403E - aExp;
3304 savedASig = aSig;
3305 aSig >>= shiftCount;
3306 z = aSig;
3307 if ( aSign ) z = - z;
3308 if ( ( z < 0 ) ^ aSign ) {
3309 invalid:
3310 float_raise( float_flag_invalid );
3311 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
3312 }
3313 if ( ( aSig<<shiftCount ) != savedASig ) {
3314 float_set_inexact();
3315 }
3316 return z;
3317
3318 }
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331 int64 floatx80_to_int64( floatx80 a )
3332 {
3333 flag aSign;
3334 int32 aExp, shiftCount;
3335 bits64 aSig, aSigExtra;
3336
3337 aSig = extractFloatx80Frac( a );
3338 aExp = extractFloatx80Exp( a );
3339 aSign = extractFloatx80Sign( a );
3340 shiftCount = 0x403E - aExp;
3341 if ( shiftCount <= 0 ) {
3342 if ( shiftCount ) {
3343 float_raise( float_flag_invalid );
3344 if ( ! aSign
3345 || ( ( aExp == 0x7FFF )
3346 && ( aSig != LIT64( 0x8000000000000000 ) ) )
3347 ) {
3348 return LIT64( 0x7FFFFFFFFFFFFFFF );
3349 }
3350 return (sbits64) LIT64( 0x8000000000000000 );
3351 }
3352 aSigExtra = 0;
3353 }
3354 else {
3355 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3356 }
3357 return roundAndPackInt64( aSign, aSig, aSigExtra );
3358
3359 }
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372 int64 floatx80_to_int64_round_to_zero( floatx80 a )
3373 {
3374 flag aSign;
3375 int32 aExp, shiftCount;
3376 bits64 aSig;
3377 int64 z;
3378
3379 aSig = extractFloatx80Frac( a );
3380 aExp = extractFloatx80Exp( a );
3381 aSign = extractFloatx80Sign( a );
3382 shiftCount = aExp - 0x403E;
3383 if ( 0 <= shiftCount ) {
3384 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
3385 if ( ( a.high != 0xC03E ) || aSig ) {
3386 float_raise( float_flag_invalid );
3387 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
3388 return LIT64( 0x7FFFFFFFFFFFFFFF );
3389 }
3390 }
3391 return (sbits64) LIT64( 0x8000000000000000 );
3392 }
3393 else if ( aExp < 0x3FFF ) {
3394 if ( aExp | aSig ) float_set_inexact();
3395 return 0;
3396 }
3397 z = aSig>>( - shiftCount );
3398 if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
3399 float_set_inexact();
3400 }
3401 if ( aSign ) z = - z;
3402 return z;
3403
3404 }
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414 float32 floatx80_to_float32( floatx80 a )
3415 {
3416 flag aSign;
3417 int32 aExp;
3418 bits64 aSig;
3419
3420 aSig = extractFloatx80Frac( a );
3421 aExp = extractFloatx80Exp( a );
3422 aSign = extractFloatx80Sign( a );
3423 if ( aExp == 0x7FFF ) {
3424 if ( (bits64) ( aSig<<1 ) ) {
3425 return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
3426 }
3427 return packFloat32( aSign, 0xFF, 0 );
3428 }
3429 shift64RightJamming( aSig, 33, &aSig );
3430 if ( aExp || aSig ) aExp -= 0x3F81;
3431 return roundAndPackFloat32( aSign, aExp, aSig );
3432
3433 }
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443 float64 floatx80_to_float64( floatx80 a )
3444 {
3445 flag aSign;
3446 int32 aExp;
3447 bits64 aSig, zSig;
3448
3449 aSig = extractFloatx80Frac( a );
3450 aExp = extractFloatx80Exp( a );
3451 aSign = extractFloatx80Sign( a );
3452 if ( aExp == 0x7FFF ) {
3453 if ( (bits64) ( aSig<<1 ) ) {
3454 return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
3455 }
3456 return packFloat64( aSign, 0x7FF, 0 );
3457 }
3458 shift64RightJamming( aSig, 1, &zSig );
3459 if ( aExp || aSig ) aExp -= 0x3C01;
3460 return roundAndPackFloat64( aSign, aExp, zSig );
3461
3462 }
3463
3464 #ifdef FLOAT128
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474 float128 floatx80_to_float128( floatx80 a )
3475 {
3476 flag aSign;
3477 int16 aExp;
3478 bits64 aSig, zSig0, zSig1;
3479
3480 aSig = extractFloatx80Frac( a );
3481 aExp = extractFloatx80Exp( a );
3482 aSign = extractFloatx80Sign( a );
3483 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
3484 return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
3485 }
3486 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
3487 return packFloat128( aSign, aExp, zSig0, zSig1 );
3488
3489 }
3490
3491 #endif
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501 floatx80 floatx80_round_to_int( floatx80 a )
3502 {
3503 flag aSign;
3504 int32 aExp;
3505 bits64 lastBitMask, roundBitsMask;
3506 int8 roundingMode;
3507 floatx80 z;
3508
3509 aExp = extractFloatx80Exp( a );
3510 if ( 0x403E <= aExp ) {
3511 if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
3512 return propagateFloatx80NaN( a, a );
3513 }
3514 return a;
3515 }
3516 if ( aExp < 0x3FFF ) {
3517 if ( ( aExp == 0 )
3518 && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
3519 return a;
3520 }
3521 float_set_inexact();
3522 aSign = extractFloatx80Sign( a );
3523 switch ( float_rounding_mode() ) {
3524 case float_round_nearest_even:
3525 if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
3526 ) {
3527 return
3528 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3529 }
3530 break;
3531 case float_round_down:
3532 return
3533 aSign ?
3534 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
3535 : packFloatx80( 0, 0, 0 );
3536 case float_round_up:
3537 return
3538 aSign ? packFloatx80( 1, 0, 0 )
3539 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
3540 }
3541 return packFloatx80( aSign, 0, 0 );
3542 }
3543 lastBitMask = 1;
3544 lastBitMask <<= 0x403E - aExp;
3545 roundBitsMask = lastBitMask - 1;
3546 z = a;
3547 roundingMode = float_rounding_mode();
3548 if ( roundingMode == float_round_nearest_even ) {
3549 z.low += lastBitMask>>1;
3550 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
3551 }
3552 else if ( roundingMode != float_round_to_zero ) {
3553 if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
3554 z.low += roundBitsMask;
3555 }
3556 }
3557 z.low &= ~ roundBitsMask;
3558 if ( z.low == 0 ) {
3559 ++z.high;
3560 z.low = LIT64( 0x8000000000000000 );
3561 }
3562 if ( z.low != a.low ) float_set_inexact();
3563 return z;
3564
3565 }
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576 static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
3577 {
3578 int32 aExp, bExp, zExp;
3579 bits64 aSig, bSig, zSig0, zSig1;
3580 int32 expDiff;
3581
3582 aSig = extractFloatx80Frac( a );
3583 aExp = extractFloatx80Exp( a );
3584 bSig = extractFloatx80Frac( b );
3585 bExp = extractFloatx80Exp( b );
3586 expDiff = aExp - bExp;
3587 if ( 0 < expDiff ) {
3588 if ( aExp == 0x7FFF ) {
3589 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3590 return a;
3591 }
3592 if ( bExp == 0 ) --expDiff;
3593 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3594 zExp = aExp;
3595 }
3596 else if ( expDiff < 0 ) {
3597 if ( bExp == 0x7FFF ) {
3598 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3599 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3600 }
3601 if ( aExp == 0 ) ++expDiff;
3602 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3603 zExp = bExp;
3604 }
3605 else {
3606 if ( aExp == 0x7FFF ) {
3607 if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3608 return propagateFloatx80NaN( a, b );
3609 }
3610 return a;
3611 }
3612 zSig1 = 0;
3613 zSig0 = aSig + bSig;
3614 if ( aExp == 0 ) {
3615 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
3616 goto roundAndPack;
3617 }
3618 zExp = aExp;
3619 goto shiftRight1;
3620 }
3621 zSig0 = aSig + bSig;
3622 if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
3623 shiftRight1:
3624 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
3625 zSig0 |= LIT64( 0x8000000000000000 );
3626 ++zExp;
3627 roundAndPack:
3628 return
3629 roundAndPackFloatx80(
3630 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3631
3632 }
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643 static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
3644 {
3645 int32 aExp, bExp, zExp;
3646 bits64 aSig, bSig, zSig0, zSig1;
3647 int32 expDiff;
3648 floatx80 z;
3649
3650 aSig = extractFloatx80Frac( a );
3651 aExp = extractFloatx80Exp( a );
3652 bSig = extractFloatx80Frac( b );
3653 bExp = extractFloatx80Exp( b );
3654 expDiff = aExp - bExp;
3655 if ( 0 < expDiff ) goto aExpBigger;
3656 if ( expDiff < 0 ) goto bExpBigger;
3657 if ( aExp == 0x7FFF ) {
3658 if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3659 return propagateFloatx80NaN( a, b );
3660 }
3661 float_raise( float_flag_invalid );
3662 z.low = floatx80_default_nan_low;
3663 z.high = floatx80_default_nan_high;
3664 return z;
3665 }
3666 if ( aExp == 0 ) {
3667 aExp = 1;
3668 bExp = 1;
3669 }
3670 zSig1 = 0;
3671 if ( bSig < aSig ) goto aBigger;
3672 if ( aSig < bSig ) goto bBigger;
3673 return packFloatx80( float_rounding_mode() == float_round_down, 0, 0 );
3674 bExpBigger:
3675 if ( bExp == 0x7FFF ) {
3676 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3677 return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
3678 }
3679 if ( aExp == 0 ) ++expDiff;
3680 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3681 bBigger:
3682 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
3683 zExp = bExp;
3684 zSign ^= 1;
3685 goto normalizeRoundAndPack;
3686 aExpBigger:
3687 if ( aExp == 0x7FFF ) {
3688 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3689 return a;
3690 }
3691 if ( bExp == 0 ) --expDiff;
3692 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3693 aBigger:
3694 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
3695 zExp = aExp;
3696 normalizeRoundAndPack:
3697 return
3698 normalizeRoundAndPackFloatx80(
3699 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3700
3701 }
3702
3703
3704
3705
3706
3707
3708
3709
3710 floatx80 floatx80_add( floatx80 a, floatx80 b )
3711 {
3712 flag aSign, bSign;
3713
3714 aSign = extractFloatx80Sign( a );
3715 bSign = extractFloatx80Sign( b );
3716 if ( aSign == bSign ) {
3717 return addFloatx80Sigs( a, b, aSign );
3718 }
3719 else {
3720 return subFloatx80Sigs( a, b, aSign );
3721 }
3722
3723 }
3724
3725
3726
3727
3728
3729
3730
3731
3732 floatx80 floatx80_sub( floatx80 a, floatx80 b )
3733 {
3734 flag aSign, bSign;
3735
3736 aSign = extractFloatx80Sign( a );
3737 bSign = extractFloatx80Sign( b );
3738 if ( aSign == bSign ) {
3739 return subFloatx80Sigs( a, b, aSign );
3740 }
3741 else {
3742 return addFloatx80Sigs( a, b, aSign );
3743 }
3744
3745 }
3746
3747
3748
3749
3750
3751
3752
3753
3754 floatx80 floatx80_mul( floatx80 a, floatx80 b )
3755 {
3756 flag aSign, bSign, zSign;
3757 int32 aExp, bExp, zExp;
3758 bits64 aSig, bSig, zSig0, zSig1;
3759 floatx80 z;
3760
3761 aSig = extractFloatx80Frac( a );
3762 aExp = extractFloatx80Exp( a );
3763 aSign = extractFloatx80Sign( a );
3764 bSig = extractFloatx80Frac( b );
3765 bExp = extractFloatx80Exp( b );
3766 bSign = extractFloatx80Sign( b );
3767 zSign = aSign ^ bSign;
3768 if ( aExp == 0x7FFF ) {
3769 if ( (bits64) ( aSig<<1 )
3770 || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3771 return propagateFloatx80NaN( a, b );
3772 }
3773 if ( ( bExp | bSig ) == 0 ) goto invalid;
3774 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3775 }
3776 if ( bExp == 0x7FFF ) {
3777 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3778 if ( ( aExp | aSig ) == 0 ) {
3779 invalid:
3780 float_raise( float_flag_invalid );
3781 z.low = floatx80_default_nan_low;
3782 z.high = floatx80_default_nan_high;
3783 return z;
3784 }
3785 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3786 }
3787 if ( aExp == 0 ) {
3788 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3789 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3790 }
3791 if ( bExp == 0 ) {
3792 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
3793 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3794 }
3795 zExp = aExp + bExp - 0x3FFE;
3796 mul64To128( aSig, bSig, &zSig0, &zSig1 );
3797 if ( 0 < (sbits64) zSig0 ) {
3798 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
3799 --zExp;
3800 }
3801 return
3802 roundAndPackFloatx80(
3803 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3804
3805 }
3806
3807
3808
3809
3810
3811
3812
3813
3814 floatx80 floatx80_div( floatx80 a, floatx80 b )
3815 {
3816 flag aSign, bSign, zSign;
3817 int32 aExp, bExp, zExp;
3818 bits64 aSig, bSig, zSig0, zSig1;
3819 bits64 rem0, rem1, rem2, term0, term1, term2;
3820 floatx80 z;
3821
3822 aSig = extractFloatx80Frac( a );
3823 aExp = extractFloatx80Exp( a );
3824 aSign = extractFloatx80Sign( a );
3825 bSig = extractFloatx80Frac( b );
3826 bExp = extractFloatx80Exp( b );
3827 bSign = extractFloatx80Sign( b );
3828 zSign = aSign ^ bSign;
3829 if ( aExp == 0x7FFF ) {
3830 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3831 if ( bExp == 0x7FFF ) {
3832 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3833 goto invalid;
3834 }
3835 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3836 }
3837 if ( bExp == 0x7FFF ) {
3838 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3839 return packFloatx80( zSign, 0, 0 );
3840 }
3841 if ( bExp == 0 ) {
3842 if ( bSig == 0 ) {
3843 if ( ( aExp | aSig ) == 0 ) {
3844 invalid:
3845 float_raise( float_flag_invalid );
3846 z.low = floatx80_default_nan_low;
3847 z.high = floatx80_default_nan_high;
3848 return z;
3849 }
3850 float_raise( float_flag_divbyzero );
3851 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3852 }
3853 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3854 }
3855 if ( aExp == 0 ) {
3856 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3857 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3858 }
3859 zExp = aExp - bExp + 0x3FFE;
3860 rem1 = 0;
3861 if ( bSig <= aSig ) {
3862 shift128Right( aSig, 0, 1, &aSig, &rem1 );
3863 ++zExp;
3864 }
3865 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
3866 mul64To128( bSig, zSig0, &term0, &term1 );
3867 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
3868 while ( (sbits64) rem0 < 0 ) {
3869 --zSig0;
3870 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3871 }
3872 zSig1 = estimateDiv128To64( rem1, 0, bSig );
3873 if ( (bits64) ( zSig1<<1 ) <= 8 ) {
3874 mul64To128( bSig, zSig1, &term1, &term2 );
3875 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3876 while ( (sbits64) rem1 < 0 ) {
3877 --zSig1;
3878 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
3879 }
3880 zSig1 |= ( ( rem1 | rem2 ) != 0 );
3881 }
3882 return
3883 roundAndPackFloatx80(
3884 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3885
3886 }
3887
3888
3889
3890
3891
3892
3893
3894
3895 floatx80 floatx80_rem( floatx80 a, floatx80 b )
3896 {
3897 flag aSign, bSign, zSign;
3898 int32 aExp, bExp, expDiff;
3899 bits64 aSig0, aSig1, bSig;
3900 bits64 q, term0, term1, alternateASig0, alternateASig1;
3901 floatx80 z;
3902
3903 aSig0 = extractFloatx80Frac( a );
3904 aExp = extractFloatx80Exp( a );
3905 aSign = extractFloatx80Sign( a );
3906 bSig = extractFloatx80Frac( b );
3907 bExp = extractFloatx80Exp( b );
3908 bSign = extractFloatx80Sign( b );
3909 if ( aExp == 0x7FFF ) {
3910 if ( (bits64) ( aSig0<<1 )
3911 || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3912 return propagateFloatx80NaN( a, b );
3913 }
3914 goto invalid;
3915 }
3916 if ( bExp == 0x7FFF ) {
3917 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3918 return a;
3919 }
3920 if ( bExp == 0 ) {
3921 if ( bSig == 0 ) {
3922 invalid:
3923 float_raise( float_flag_invalid );
3924 z.low = floatx80_default_nan_low;
3925 z.high = floatx80_default_nan_high;
3926 return z;
3927 }
3928 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3929 }
3930 if ( aExp == 0 ) {
3931 if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
3932 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3933 }
3934 bSig |= LIT64( 0x8000000000000000 );
3935 zSign = aSign;
3936 expDiff = aExp - bExp;
3937 aSig1 = 0;
3938 if ( expDiff < 0 ) {
3939 if ( expDiff < -1 ) return a;
3940 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
3941 expDiff = 0;
3942 }
3943 q = ( bSig <= aSig0 );
3944 if ( q ) aSig0 -= bSig;
3945 expDiff -= 64;
3946 while ( 0 < expDiff ) {
3947 q = estimateDiv128To64( aSig0, aSig1, bSig );
3948 q = ( 2 < q ) ? q - 2 : 0;
3949 mul64To128( bSig, q, &term0, &term1 );
3950 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3951 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
3952 expDiff -= 62;
3953 }
3954 expDiff += 64;
3955 if ( 0 < expDiff ) {
3956 q = estimateDiv128To64( aSig0, aSig1, bSig );
3957 q = ( 2 < q ) ? q - 2 : 0;
3958 q >>= 64 - expDiff;
3959 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
3960 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3961 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
3962 while ( le128( term0, term1, aSig0, aSig1 ) ) {
3963 ++q;
3964 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3965 }
3966 }
3967 else {
3968 term1 = 0;
3969 term0 = bSig;
3970 }
3971 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
3972 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
3973 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
3974 && ( q & 1 ) )
3975 ) {
3976 aSig0 = alternateASig0;
3977 aSig1 = alternateASig1;
3978 zSign = ! zSign;
3979 }
3980 return
3981 normalizeRoundAndPackFloatx80(
3982 80, zSign, bExp + expDiff, aSig0, aSig1 );
3983
3984 }
3985
3986
3987
3988
3989
3990
3991
3992
3993 floatx80 floatx80_sqrt( floatx80 a )
3994 {
3995 flag aSign;
3996 int32 aExp, zExp;
3997 bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
3998 bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
3999 floatx80 z;
4000
4001 aSig0 = extractFloatx80Frac( a );
4002 aExp = extractFloatx80Exp( a );
4003 aSign = extractFloatx80Sign( a );
4004 if ( aExp == 0x7FFF ) {
4005 if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
4006 if ( ! aSign ) return a;
4007 goto invalid;
4008 }
4009 if ( aSign ) {
4010 if ( ( aExp | aSig0 ) == 0 ) return a;
4011 invalid:
4012 float_raise( float_flag_invalid );
4013 z.low = floatx80_default_nan_low;
4014 z.high = floatx80_default_nan_high;
4015 return z;
4016 }
4017 if ( aExp == 0 ) {
4018 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
4019 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4020 }
4021 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
4022 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
4023 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
4024 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4025 doubleZSig0 = zSig0<<1;
4026 mul64To128( zSig0, zSig0, &term0, &term1 );
4027 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
4028 while ( (sbits64) rem0 < 0 ) {
4029 --zSig0;
4030 doubleZSig0 -= 2;
4031 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
4032 }
4033 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4034 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
4035 if ( zSig1 == 0 ) zSig1 = 1;
4036 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4037 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4038 mul64To128( zSig1, zSig1, &term2, &term3 );
4039 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
4040 while ( (sbits64) rem1 < 0 ) {
4041 --zSig1;
4042 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
4043 term3 |= 1;
4044 term2 |= doubleZSig0;
4045 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
4046 }
4047 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4048 }
4049 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
4050 zSig0 |= doubleZSig0;
4051 return
4052 roundAndPackFloatx80(
4053 floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
4054
4055 }
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065 flag floatx80_eq( floatx80 a, floatx80 b )
4066 {
4067
4068 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4069 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4070 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4071 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4072 ) {
4073 if ( floatx80_is_signaling_nan( a )
4074 || floatx80_is_signaling_nan( b ) ) {
4075 float_raise( float_flag_invalid );
4076 }
4077 return 0;
4078 }
4079 return
4080 ( a.low == b.low )
4081 && ( ( a.high == b.high )
4082 || ( ( a.low == 0 )
4083 && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4084 );
4085
4086 }
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096 flag floatx80_le( floatx80 a, floatx80 b )
4097 {
4098 flag aSign, bSign;
4099
4100 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4101 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4102 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4103 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4104 ) {
4105 float_raise( float_flag_invalid );
4106 return 0;
4107 }
4108 aSign = extractFloatx80Sign( a );
4109 bSign = extractFloatx80Sign( b );
4110 if ( aSign != bSign ) {
4111 return
4112 aSign
4113 || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4114 == 0 );
4115 }
4116 return
4117 aSign ? le128( b.high, b.low, a.high, a.low )
4118 : le128( a.high, a.low, b.high, b.low );
4119
4120 }
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130 flag floatx80_lt( floatx80 a, floatx80 b )
4131 {
4132 flag aSign, bSign;
4133
4134 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4135 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4136 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4137 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4138 ) {
4139 float_raise( float_flag_invalid );
4140 return 0;
4141 }
4142 aSign = extractFloatx80Sign( a );
4143 bSign = extractFloatx80Sign( b );
4144 if ( aSign != bSign ) {
4145 return
4146 aSign
4147 && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4148 != 0 );
4149 }
4150 return
4151 aSign ? lt128( b.high, b.low, a.high, a.low )
4152 : lt128( a.high, a.low, b.high, b.low );
4153
4154 }
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164 flag floatx80_eq_signaling( floatx80 a, floatx80 b )
4165 {
4166
4167 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4168 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4169 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4170 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4171 ) {
4172 float_raise( float_flag_invalid );
4173 return 0;
4174 }
4175 return
4176 ( a.low == b.low )
4177 && ( ( a.high == b.high )
4178 || ( ( a.low == 0 )
4179 && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4180 );
4181
4182 }
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192 flag floatx80_le_quiet( floatx80 a, floatx80 b )
4193 {
4194 flag aSign, bSign;
4195
4196 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4197 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4198 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4199 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4200 ) {
4201 if ( floatx80_is_signaling_nan( a )
4202 || floatx80_is_signaling_nan( b ) ) {
4203 float_raise( float_flag_invalid );
4204 }
4205 return 0;
4206 }
4207 aSign = extractFloatx80Sign( a );
4208 bSign = extractFloatx80Sign( b );
4209 if ( aSign != bSign ) {
4210 return
4211 aSign
4212 || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4213 == 0 );
4214 }
4215 return
4216 aSign ? le128( b.high, b.low, a.high, a.low )
4217 : le128( a.high, a.low, b.high, b.low );
4218
4219 }
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229 flag floatx80_lt_quiet( floatx80 a, floatx80 b )
4230 {
4231 flag aSign, bSign;
4232
4233 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4234 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4235 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4236 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4237 ) {
4238 if ( floatx80_is_signaling_nan( a )
4239 || floatx80_is_signaling_nan( b ) ) {
4240 float_raise( float_flag_invalid );
4241 }
4242 return 0;
4243 }
4244 aSign = extractFloatx80Sign( a );
4245 bSign = extractFloatx80Sign( b );
4246 if ( aSign != bSign ) {
4247 return
4248 aSign
4249 && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4250 != 0 );
4251 }
4252 return
4253 aSign ? lt128( b.high, b.low, a.high, a.low )
4254 : lt128( a.high, a.low, b.high, b.low );
4255
4256 }
4257
4258 #endif
4259
4260 #ifdef FLOAT128
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273 int32 float128_to_int32( float128 a )
4274 {
4275 flag aSign;
4276 int32 aExp, shiftCount;
4277 bits64 aSig0, aSig1;
4278
4279 aSig1 = extractFloat128Frac1( a );
4280 aSig0 = extractFloat128Frac0( a );
4281 aExp = extractFloat128Exp( a );
4282 aSign = extractFloat128Sign( a );
4283 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
4284 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4285 aSig0 |= ( aSig1 != 0 );
4286 shiftCount = 0x4028 - aExp;
4287 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
4288 return roundAndPackInt32( aSign, aSig0 );
4289
4290 }
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303 int32 float128_to_int32_round_to_zero( float128 a )
4304 {
4305 flag aSign;
4306 int32 aExp, shiftCount;
4307 bits64 aSig0, aSig1, savedASig;
4308 int32 z;
4309
4310 aSig1 = extractFloat128Frac1( a );
4311 aSig0 = extractFloat128Frac0( a );
4312 aExp = extractFloat128Exp( a );
4313 aSign = extractFloat128Sign( a );
4314 aSig0 |= ( aSig1 != 0 );
4315 if ( 0x401E < aExp ) {
4316 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
4317 goto invalid;
4318 }
4319 else if ( aExp < 0x3FFF ) {
4320 if ( aExp || aSig0 ) float_set_inexact();
4321 return 0;
4322 }
4323 aSig0 |= LIT64( 0x0001000000000000 );
4324 shiftCount = 0x402F - aExp;
4325 savedASig = aSig0;
4326 aSig0 >>= shiftCount;
4327 z = aSig0;
4328 if ( aSign ) z = - z;
4329 if ( ( z < 0 ) ^ aSign ) {
4330 invalid:
4331 float_raise( float_flag_invalid );
4332 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
4333 }
4334 if ( ( aSig0<<shiftCount ) != savedASig ) {
4335 float_set_inexact();
4336 }
4337 return z;
4338
4339 }
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352 int64 float128_to_int64( float128 a )
4353 {
4354 flag aSign;
4355 int32 aExp, shiftCount;
4356 bits64 aSig0, aSig1;
4357
4358 aSig1 = extractFloat128Frac1( a );
4359 aSig0 = extractFloat128Frac0( a );
4360 aExp = extractFloat128Exp( a );
4361 aSign = extractFloat128Sign( a );
4362 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4363 shiftCount = 0x402F - aExp;
4364 if ( shiftCount <= 0 ) {
4365 if ( 0x403E < aExp ) {
4366 float_raise( float_flag_invalid );
4367 if ( ! aSign
4368 || ( ( aExp == 0x7FFF )
4369 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
4370 )
4371 ) {
4372 return LIT64( 0x7FFFFFFFFFFFFFFF );
4373 }
4374 return (sbits64) LIT64( 0x8000000000000000 );
4375 }
4376 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
4377 }
4378 else {
4379 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
4380 }
4381 return roundAndPackInt64( aSign, aSig0, aSig1 );
4382
4383 }
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396 int64 float128_to_int64_round_to_zero( float128 a )
4397 {
4398 flag aSign;
4399 int32 aExp, shiftCount;
4400 bits64 aSig0, aSig1;
4401 int64 z;
4402
4403 aSig1 = extractFloat128Frac1( a );
4404 aSig0 = extractFloat128Frac0( a );
4405 aExp = extractFloat128Exp( a );
4406 aSign = extractFloat128Sign( a );
4407 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4408 shiftCount = aExp - 0x402F;
4409 if ( 0 < shiftCount ) {
4410 if ( 0x403E <= aExp ) {
4411 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4412 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
4413 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4414 if ( aSig1 ) float_set_inexact();
4415 }
4416 else {
4417 float_raise( float_flag_invalid );
4418 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
4419 return LIT64( 0x7FFFFFFFFFFFFFFF );
4420 }
4421 }
4422 return (sbits64) LIT64( 0x8000000000000000 );
4423 }
4424 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4425 if ( (bits64) ( aSig1<<shiftCount ) ) {
4426 float_set_inexact();
4427 }
4428 }
4429 else {
4430 if ( aExp < 0x3FFF ) {
4431 if ( aExp | aSig0 | aSig1 ) {
4432 float_set_inexact();
4433 }
4434 return 0;
4435 }
4436 z = aSig0>>( - shiftCount );
4437 if ( aSig1
4438 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4439 float_set_inexact();
4440 }
4441 }
4442 if ( aSign ) z = - z;
4443 return z;
4444
4445 }
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455 float32 float128_to_float32( float128 a )
4456 {
4457 flag aSign;
4458 int32 aExp;
4459 bits64 aSig0, aSig1;
4460 bits32 zSig;
4461
4462 aSig1 = extractFloat128Frac1( a );
4463 aSig0 = extractFloat128Frac0( a );
4464 aExp = extractFloat128Exp( a );
4465 aSign = extractFloat128Sign( a );
4466 if ( aExp == 0x7FFF ) {
4467 if ( aSig0 | aSig1 ) {
4468 return commonNaNToFloat32( float128ToCommonNaN( a ) );
4469 }
4470 return packFloat32( aSign, 0xFF, 0 );
4471 }
4472 aSig0 |= ( aSig1 != 0 );
4473 shift64RightJamming( aSig0, 18, &aSig0 );
4474 zSig = aSig0;
4475 if ( aExp || zSig ) {
4476 zSig |= 0x40000000;
4477 aExp -= 0x3F81;
4478 }
4479 return roundAndPackFloat32( aSign, aExp, zSig );
4480
4481 }
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491 float64 float128_to_float64( float128 a )
4492 {
4493 flag aSign;
4494 int32 aExp;
4495 bits64 aSig0, aSig1;
4496
4497 aSig1 = extractFloat128Frac1( a );
4498 aSig0 = extractFloat128Frac0( a );
4499 aExp = extractFloat128Exp( a );
4500 aSign = extractFloat128Sign( a );
4501 if ( aExp == 0x7FFF ) {
4502 if ( aSig0 | aSig1 ) {
4503 return commonNaNToFloat64( float128ToCommonNaN( a ) );
4504 }
4505 return packFloat64( aSign, 0x7FF, 0 );
4506 }
4507 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4508 aSig0 |= ( aSig1 != 0 );
4509 if ( aExp || aSig0 ) {
4510 aSig0 |= LIT64( 0x4000000000000000 );
4511 aExp -= 0x3C01;
4512 }
4513 return roundAndPackFloat64( aSign, aExp, aSig0 );
4514
4515 }
4516
4517 #ifdef FLOATX80
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527 floatx80 float128_to_floatx80( float128 a )
4528 {
4529 flag aSign;
4530 int32 aExp;
4531 bits64 aSig0, aSig1;
4532
4533 aSig1 = extractFloat128Frac1( a );
4534 aSig0 = extractFloat128Frac0( a );
4535 aExp = extractFloat128Exp( a );
4536 aSign = extractFloat128Sign( a );
4537 if ( aExp == 0x7FFF ) {
4538 if ( aSig0 | aSig1 ) {
4539 return commonNaNToFloatx80( float128ToCommonNaN( a ) );
4540 }
4541 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4542 }
4543 if ( aExp == 0 ) {
4544 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
4545 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4546 }
4547 else {
4548 aSig0 |= LIT64( 0x0001000000000000 );
4549 }
4550 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
4551 return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
4552
4553 }
4554
4555 #endif
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565 float128 float128_round_to_int( float128 a )
4566 {
4567 flag aSign;
4568 int32 aExp;
4569 bits64 lastBitMask, roundBitsMask;
4570 int8 roundingMode;
4571 float128 z;
4572
4573 aExp = extractFloat128Exp( a );
4574 if ( 0x402F <= aExp ) {
4575 if ( 0x406F <= aExp ) {
4576 if ( ( aExp == 0x7FFF )
4577 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
4578 ) {
4579 return propagateFloat128NaN( a, a );
4580 }
4581 return a;
4582 }
4583 lastBitMask = 1;
4584 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
4585 roundBitsMask = lastBitMask - 1;
4586 z = a;
4587 roundingMode = float_rounding_mode();
4588 if ( roundingMode == float_round_nearest_even ) {
4589 if ( lastBitMask ) {
4590 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
4591 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4592 }
4593 else {
4594 if ( (sbits64) z.low < 0 ) {
4595 ++z.high;
4596 if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
4597 }
4598 }
4599 }
4600 else if ( roundingMode != float_round_to_zero ) {
4601 if ( extractFloat128Sign( z )
4602 ^ ( roundingMode == float_round_up ) ) {
4603 add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
4604 }
4605 }
4606 z.low &= ~ roundBitsMask;
4607 }
4608 else {
4609 if ( aExp < 0x3FFF ) {
4610 if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
4611 float_set_inexact();
4612 aSign = extractFloat128Sign( a );
4613 switch ( float_rounding_mode() ) {
4614 case float_round_nearest_even:
4615 if ( ( aExp == 0x3FFE )
4616 && ( extractFloat128Frac0( a )
4617 | extractFloat128Frac1( a ) )
4618 ) {
4619 return packFloat128( aSign, 0x3FFF, 0, 0 );
4620 }
4621 break;
4622 case float_round_down:
4623 return
4624 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
4625 : packFloat128( 0, 0, 0, 0 );
4626 case float_round_up:
4627 return
4628 aSign ? packFloat128( 1, 0, 0, 0 )
4629 : packFloat128( 0, 0x3FFF, 0, 0 );
4630 }
4631 return packFloat128( aSign, 0, 0, 0 );
4632 }
4633 lastBitMask = 1;
4634 lastBitMask <<= 0x402F - aExp;
4635 roundBitsMask = lastBitMask - 1;
4636 z.low = 0;
4637 z.high = a.high;
4638 roundingMode = float_rounding_mode();
4639 if ( roundingMode == float_round_nearest_even ) {
4640 z.high += lastBitMask>>1;
4641 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
4642 z.high &= ~ lastBitMask;
4643 }
4644 }
4645 else if ( roundingMode != float_round_to_zero ) {
4646 if ( extractFloat128Sign( z )
4647 ^ ( roundingMode == float_round_up ) ) {
4648 z.high |= ( a.low != 0 );
4649 z.high += roundBitsMask;
4650 }
4651 }
4652 z.high &= ~ roundBitsMask;
4653 }
4654 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
4655 float_set_inexact();
4656 }
4657 return z;
4658
4659 }
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670 static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
4671 {
4672 int32 aExp, bExp, zExp;
4673 bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4674 int32 expDiff;
4675
4676 aSig1 = extractFloat128Frac1( a );
4677 aSig0 = extractFloat128Frac0( a );
4678 aExp = extractFloat128Exp( a );
4679 bSig1 = extractFloat128Frac1( b );
4680 bSig0 = extractFloat128Frac0( b );
4681 bExp = extractFloat128Exp( b );
4682 expDiff = aExp - bExp;
4683 if ( 0 < expDiff ) {
4684 if ( aExp == 0x7FFF ) {
4685 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4686 return a;
4687 }
4688 if ( bExp == 0 ) {
4689 --expDiff;
4690 }
4691 else {
4692 bSig0 |= LIT64( 0x0001000000000000 );
4693 }
4694 shift128ExtraRightJamming(
4695 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
4696 zExp = aExp;
4697 }
4698 else if ( expDiff < 0 ) {
4699 if ( bExp == 0x7FFF ) {
4700 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4701 return packFloat128( zSign, 0x7FFF, 0, 0 );
4702 }
4703 if ( aExp == 0 ) {
4704 ++expDiff;
4705 }
4706 else {
4707 aSig0 |= LIT64( 0x0001000000000000 );
4708 }
4709 shift128ExtraRightJamming(
4710 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
4711 zExp = bExp;
4712 }
4713 else {
4714 if ( aExp == 0x7FFF ) {
4715 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4716 return propagateFloat128NaN( a, b );
4717 }
4718 return a;
4719 }
4720 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4721 if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
4722 zSig2 = 0;
4723 zSig0 |= LIT64( 0x0002000000000000 );
4724 zExp = aExp;
4725 goto shiftRight1;
4726 }
4727 aSig0 |= LIT64( 0x0001000000000000 );
4728 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4729 --zExp;
4730 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
4731 ++zExp;
4732 shiftRight1:
4733 shift128ExtraRightJamming(
4734 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4735 roundAndPack:
4736 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
4737
4738 }
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749 static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
4750 {
4751 int32 aExp, bExp, zExp;
4752 bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
4753 int32 expDiff;
4754 float128 z;
4755
4756 aSig1 = extractFloat128Frac1( a );
4757 aSig0 = extractFloat128Frac0( a );
4758 aExp = extractFloat128Exp( a );
4759 bSig1 = extractFloat128Frac1( b );
4760 bSig0 = extractFloat128Frac0( b );
4761 bExp = extractFloat128Exp( b );
4762 expDiff = aExp - bExp;
4763 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4764 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
4765 if ( 0 < expDiff ) goto aExpBigger;
4766 if ( expDiff < 0 ) goto bExpBigger;
4767 if ( aExp == 0x7FFF ) {
4768 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4769 return propagateFloat128NaN( a, b );
4770 }
4771 float_raise( float_flag_invalid );
4772 z.low = float128_default_nan_low;
4773 z.high = float128_default_nan_high;
4774 return z;
4775 }
4776 if ( aExp == 0 ) {
4777 aExp = 1;
4778 bExp = 1;
4779 }
4780 if ( bSig0 < aSig0 ) goto aBigger;
4781 if ( aSig0 < bSig0 ) goto bBigger;
4782 if ( bSig1 < aSig1 ) goto aBigger;
4783 if ( aSig1 < bSig1 ) goto bBigger;
4784 return packFloat128( float_rounding_mode() == float_round_down, 0, 0, 0 );
4785 bExpBigger:
4786 if ( bExp == 0x7FFF ) {
4787 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4788 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
4789 }
4790 if ( aExp == 0 ) {
4791 ++expDiff;
4792 }
4793 else {
4794 aSig0 |= LIT64( 0x4000000000000000 );
4795 }
4796 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
4797 bSig0 |= LIT64( 0x4000000000000000 );
4798 bBigger:
4799 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
4800 zExp = bExp;
4801 zSign ^= 1;
4802 goto normalizeRoundAndPack;
4803 aExpBigger:
4804 if ( aExp == 0x7FFF ) {
4805 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4806 return a;
4807 }
4808 if ( bExp == 0 ) {
4809 --expDiff;
4810 }
4811 else {
4812 bSig0 |= LIT64( 0x4000000000000000 );
4813 }
4814 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
4815 aSig0 |= LIT64( 0x4000000000000000 );
4816 aBigger:
4817 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4818 zExp = aExp;
4819 normalizeRoundAndPack:
4820 --zExp;
4821 return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
4822
4823 }
4824
4825
4826
4827
4828
4829
4830
4831
4832 float128 float128_add( float128 a, float128 b )
4833 {
4834 flag aSign, bSign;
4835
4836 aSign = extractFloat128Sign( a );
4837 bSign = extractFloat128Sign( b );
4838 if ( aSign == bSign ) {
4839 return addFloat128Sigs( a, b, aSign );
4840 }
4841 else {
4842 return subFloat128Sigs( a, b, aSign );
4843 }
4844
4845 }
4846
4847
4848
4849
4850
4851
4852
4853
4854 float128 float128_sub( float128 a, float128 b )
4855 {
4856 flag aSign, bSign;
4857
4858 aSign = extractFloat128Sign( a );
4859 bSign = extractFloat128Sign( b );
4860 if ( aSign == bSign ) {
4861 return subFloat128Sigs( a, b, aSign );
4862 }
4863 else {
4864 return addFloat128Sigs( a, b, aSign );
4865 }
4866
4867 }
4868
4869
4870
4871
4872
4873
4874
4875
4876 float128 float128_mul( float128 a, float128 b )
4877 {
4878 flag aSign, bSign, zSign;
4879 int32 aExp, bExp, zExp;
4880 bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
4881 float128 z;
4882
4883 aSig1 = extractFloat128Frac1( a );
4884 aSig0 = extractFloat128Frac0( a );
4885 aExp = extractFloat128Exp( a );
4886 aSign = extractFloat128Sign( a );
4887 bSig1 = extractFloat128Frac1( b );
4888 bSig0 = extractFloat128Frac0( b );
4889 bExp = extractFloat128Exp( b );
4890 bSign = extractFloat128Sign( b );
4891 zSign = aSign ^ bSign;
4892 if ( aExp == 0x7FFF ) {
4893 if ( ( aSig0 | aSig1 )
4894 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
4895 return propagateFloat128NaN( a, b );
4896 }
4897 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
4898 return packFloat128( zSign, 0x7FFF, 0, 0 );
4899 }
4900 if ( bExp == 0x7FFF ) {
4901 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4902 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
4903 invalid:
4904 float_raise( float_flag_invalid );
4905 z.low = float128_default_nan_low;
4906 z.high = float128_default_nan_high;
4907 return z;
4908 }
4909 return packFloat128( zSign, 0x7FFF, 0, 0 );
4910 }
4911 if ( aExp == 0 ) {
4912 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4913 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4914 }
4915 if ( bExp == 0 ) {
4916 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4917 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
4918 }
4919 zExp = aExp + bExp - 0x4000;
4920 aSig0 |= LIT64( 0x0001000000000000 );
4921 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
4922 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
4923 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
4924 zSig2 |= ( zSig3 != 0 );
4925 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
4926 shift128ExtraRightJamming(
4927 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4928 ++zExp;
4929 }
4930 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
4931
4932 }
4933
4934
4935
4936
4937
4938
4939
4940
4941 float128 float128_div( float128 a, float128 b )
4942 {
4943 flag aSign, bSign, zSign;
4944 int32 aExp, bExp, zExp;
4945 bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4946 bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4947 float128 z;
4948
4949 aSig1 = extractFloat128Frac1( a );
4950 aSig0 = extractFloat128Frac0( a );
4951 aExp = extractFloat128Exp( a );
4952 aSign = extractFloat128Sign( a );
4953 bSig1 = extractFloat128Frac1( b );
4954 bSig0 = extractFloat128Frac0( b );
4955 bExp = extractFloat128Exp( b );
4956 bSign = extractFloat128Sign( b );
4957 zSign = aSign ^ bSign;
4958 if ( aExp == 0x7FFF ) {
4959 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4960 if ( bExp == 0x7FFF ) {
4961 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4962 goto invalid;
4963 }
4964 return packFloat128( zSign, 0x7FFF, 0, 0 );
4965 }
4966 if ( bExp == 0x7FFF ) {
4967 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4968 return packFloat128( zSign, 0, 0, 0 );
4969 }
4970 if ( bExp == 0 ) {
4971 if ( ( bSig0 | bSig1 ) == 0 ) {
4972 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
4973 invalid:
4974 float_raise( float_flag_invalid );
4975 z.low = float128_default_nan_low;
4976 z.high = float128_default_nan_high;
4977 return z;
4978 }
4979 float_raise( float_flag_divbyzero );
4980 return packFloat128( zSign, 0x7FFF, 0, 0 );
4981 }
4982 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
4983 }
4984 if ( aExp == 0 ) {
4985 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4986 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4987 }
4988 zExp = aExp - bExp + 0x3FFD;
4989 shortShift128Left(
4990 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
4991 shortShift128Left(
4992 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
4993 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
4994 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
4995 ++zExp;
4996 }
4997 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
4998 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
4999 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
5000 while ( (sbits64) rem0 < 0 ) {
5001 --zSig0;
5002 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
5003 }
5004 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
5005 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
5006 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
5007 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
5008 while ( (sbits64) rem1 < 0 ) {
5009 --zSig1;
5010 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
5011 }
5012 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5013 }
5014 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
5015 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
5016
5017 }
5018
5019
5020
5021
5022
5023
5024
5025
5026 float128 float128_rem( float128 a, float128 b )
5027 {
5028 flag aSign, bSign, zSign;
5029 int32 aExp, bExp, expDiff;
5030 bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
5031 bits64 allZero, alternateASig0, alternateASig1, sigMean1;
5032 sbits64 sigMean0;
5033 float128 z;
5034
5035 aSig1 = extractFloat128Frac1( a );
5036 aSig0 = extractFloat128Frac0( a );
5037 aExp = extractFloat128Exp( a );
5038 aSign = extractFloat128Sign( a );
5039 bSig1 = extractFloat128Frac1( b );
5040 bSig0 = extractFloat128Frac0( b );
5041 bExp = extractFloat128Exp( b );
5042 bSign = extractFloat128Sign( b );
5043 if ( aExp == 0x7FFF ) {
5044 if ( ( aSig0 | aSig1 )
5045 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5046 return propagateFloat128NaN( a, b );
5047 }
5048 goto invalid;
5049 }
5050 if ( bExp == 0x7FFF ) {
5051 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
5052 return a;
5053 }
5054 if ( bExp == 0 ) {
5055 if ( ( bSig0 | bSig1 ) == 0 ) {
5056 invalid:
5057 float_raise( float_flag_invalid );
5058 z.low = float128_default_nan_low;
5059 z.high = float128_default_nan_high;
5060 return z;
5061 }
5062 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5063 }
5064 if ( aExp == 0 ) {
5065 if ( ( aSig0 | aSig1 ) == 0 ) return a;
5066 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5067 }
5068 expDiff = aExp - bExp;
5069 if ( expDiff < -1 ) return a;
5070 shortShift128Left(
5071 aSig0 | LIT64( 0x0001000000000000 ),
5072 aSig1,
5073 15 - ( expDiff < 0 ),
5074 &aSig0,
5075 &aSig1
5076 );
5077 shortShift128Left(
5078 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5079 q = le128( bSig0, bSig1, aSig0, aSig1 );
5080 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5081 expDiff -= 64;
5082 while ( 0 < expDiff ) {
5083 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5084 q = ( 4 < q ) ? q - 4 : 0;
5085 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5086 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
5087 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
5088 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
5089 expDiff -= 61;
5090 }
5091 if ( -64 < expDiff ) {
5092 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5093 q = ( 4 < q ) ? q - 4 : 0;
5094 q >>= - expDiff;
5095 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5096 expDiff += 52;
5097 if ( expDiff < 0 ) {
5098 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5099 }
5100 else {
5101 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
5102 }
5103 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5104 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
5105 }
5106 else {
5107 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
5108 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5109 }
5110 do {
5111 alternateASig0 = aSig0;
5112 alternateASig1 = aSig1;
5113 ++q;
5114 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5115 } while ( 0 <= (sbits64) aSig0 );
5116 add128(
5117 aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
5118 if ( ( sigMean0 < 0 )
5119 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
5120 aSig0 = alternateASig0;
5121 aSig1 = alternateASig1;
5122 }
5123 zSign = ( (sbits64) aSig0 < 0 );
5124 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
5125 return
5126 normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
5127
5128 }
5129
5130
5131
5132
5133
5134
5135
5136
5137 float128 float128_sqrt( float128 a )
5138 {
5139 flag aSign;
5140 int32 aExp, zExp;
5141 bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
5142 bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5143 float128 z;
5144
5145 aSig1 = extractFloat128Frac1( a );
5146 aSig0 = extractFloat128Frac0( a );
5147 aExp = extractFloat128Exp( a );
5148 aSign = extractFloat128Sign( a );
5149 if ( aExp == 0x7FFF ) {
5150 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
5151 if ( ! aSign ) return a;
5152 goto invalid;
5153 }
5154 if ( aSign ) {
5155 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
5156 invalid:
5157 float_raise( float_flag_invalid );
5158 z.low = float128_default_nan_low;
5159 z.high = float128_default_nan_high;
5160 return z;
5161 }
5162 if ( aExp == 0 ) {
5163 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
5164 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5165 }
5166 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
5167 aSig0 |= LIT64( 0x0001000000000000 );
5168 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
5169 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
5170 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5171 doubleZSig0 = zSig0<<1;
5172 mul64To128( zSig0, zSig0, &term0, &term1 );
5173 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5174 while ( (sbits64) rem0 < 0 ) {
5175 --zSig0;
5176 doubleZSig0 -= 2;
5177 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5178 }
5179 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5180 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
5181 if ( zSig1 == 0 ) zSig1 = 1;
5182 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5183 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5184 mul64To128( zSig1, zSig1, &term2, &term3 );
5185 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5186 while ( (sbits64) rem1 < 0 ) {
5187 --zSig1;
5188 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5189 term3 |= 1;
5190 term2 |= doubleZSig0;
5191 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5192 }
5193 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5194 }
5195 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
5196 return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
5197
5198 }
5199
5200
5201
5202
5203
5204
5205
5206
5207 flag float128_eq( float128 a, float128 b )
5208 {
5209
5210 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5211 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5212 || ( ( extractFloat128Exp( b ) == 0x7FFF )
5213 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5214 ) {
5215 if ( float128_is_signaling_nan( a )
5216 || float128_is_signaling_nan( b ) ) {
5217 float_raise( float_flag_invalid );
5218 }
5219 return 0;
5220 }
5221 return
5222 ( a.low == b.low )
5223 && ( ( a.high == b.high )
5224 || ( ( a.low == 0 )
5225 && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5226 );
5227
5228 }
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238 flag float128_le( float128 a, float128 b )
5239 {
5240 flag aSign, bSign;
5241
5242 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5243 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5244 || ( ( extractFloat128Exp( b ) == 0x7FFF )
5245 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5246 ) {
5247 float_raise( float_flag_invalid );
5248 return 0;
5249 }
5250 aSign = extractFloat128Sign( a );
5251 bSign = extractFloat128Sign( b );
5252 if ( aSign != bSign ) {
5253 return
5254 aSign
5255 || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5256 == 0 );
5257 }
5258 return
5259 aSign ? le128( b.high, b.low, a.high, a.low )
5260 : le128( a.high, a.low, b.high, b.low );
5261
5262 }
5263
5264
5265
5266
5267
5268
5269
5270
5271 flag float128_lt( float128 a, float128 b )
5272 {
5273 flag aSign, bSign;
5274
5275 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5276 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5277 || ( ( extractFloat128Exp( b ) == 0x7FFF )
5278 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5279 ) {
5280 float_raise( float_flag_invalid );
5281 return 0;
5282 }
5283 aSign = extractFloat128Sign( a );
5284 bSign = extractFloat128Sign( b );
5285 if ( aSign != bSign ) {
5286 return
5287 aSign
5288 && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5289 != 0 );
5290 }
5291 return
5292 aSign ? lt128( b.high, b.low, a.high, a.low )
5293 : lt128( a.high, a.low, b.high, b.low );
5294
5295 }
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305 flag float128_eq_signaling( float128 a, float128 b )
5306 {
5307
5308 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5309 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5310 || ( ( extractFloat128Exp( b ) == 0x7FFF )
5311 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5312 ) {
5313 float_raise( float_flag_invalid );
5314 return 0;
5315 }
5316 return
5317 ( a.low == b.low )
5318 && ( ( a.high == b.high )
5319 || ( ( a.low == 0 )
5320 && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5321 );
5322
5323 }
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333 flag float128_le_quiet( float128 a, float128 b )
5334 {
5335 flag aSign, bSign;
5336
5337 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5338 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5339 || ( ( extractFloat128Exp( b ) == 0x7FFF )
5340 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5341 ) {
5342 if ( float128_is_signaling_nan( a )
5343 || float128_is_signaling_nan( b ) ) {
5344 float_raise( float_flag_invalid );
5345 }
5346 return 0;
5347 }
5348 aSign = extractFloat128Sign( a );
5349 bSign = extractFloat128Sign( b );
5350 if ( aSign != bSign ) {
5351 return
5352 aSign
5353 || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5354 == 0 );
5355 }
5356 return
5357 aSign ? le128( b.high, b.low, a.high, a.low )
5358 : le128( a.high, a.low, b.high, b.low );
5359
5360 }
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370 flag float128_lt_quiet( float128 a, float128 b )
5371 {
5372 flag aSign, bSign;
5373
5374 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5375 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5376 || ( ( extractFloat128Exp( b ) == 0x7FFF )
5377 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5378 ) {
5379 if ( float128_is_signaling_nan( a )
5380 || float128_is_signaling_nan( b ) ) {
5381 float_raise( float_flag_invalid );
5382 }
5383 return 0;
5384 }
5385 aSign = extractFloat128Sign( a );
5386 bSign = extractFloat128Sign( b );
5387 if ( aSign != bSign ) {
5388 return
5389 aSign
5390 && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5391 != 0 );
5392 }
5393 return
5394 aSign ? lt128( b.high, b.low, a.high, a.low )
5395 : lt128( a.high, a.low, b.high, b.low );
5396
5397 }
5398
5399 #endif
5400
5401
5402 #if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS)
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425 uint32 float64_to_uint32_round_to_zero( float64 a )
5426 {
5427 flag aSign;
5428 int16 aExp, shiftCount;
5429 bits64 aSig, savedASig;
5430 uint32 z;
5431
5432 aSig = extractFloat64Frac( a );
5433 aExp = extractFloat64Exp( a );
5434 aSign = extractFloat64Sign( a );
5435
5436 if (aSign) {
5437 float_raise( float_flag_invalid );
5438 return(0);
5439 }
5440
5441 if ( 0x41E < aExp ) {
5442 float_raise( float_flag_invalid );
5443 return 0xffffffff;
5444 }
5445 else if ( aExp < 0x3FF ) {
5446 if ( aExp || aSig ) float_set_inexact();
5447 return 0;
5448 }
5449 aSig |= LIT64( 0x0010000000000000 );
5450 shiftCount = 0x433 - aExp;
5451 savedASig = aSig;
5452 aSig >>= shiftCount;
5453 z = aSig;
5454 if ( ( aSig<<shiftCount ) != savedASig ) {
5455 float_set_inexact();
5456 }
5457 return z;
5458
5459 }
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471 uint32 float32_to_uint32_round_to_zero( float32 a )
5472 {
5473 flag aSign;
5474 int16 aExp, shiftCount;
5475 bits32 aSig;
5476 uint32 z;
5477
5478 aSig = extractFloat32Frac( a );
5479 aExp = extractFloat32Exp( a );
5480 aSign = extractFloat32Sign( a );
5481 shiftCount = aExp - 0x9E;
5482
5483 if (aSign) {
5484 float_raise( float_flag_invalid );
5485 return(0);
5486 }
5487 if ( 0 < shiftCount ) {
5488 float_raise( float_flag_invalid );
5489 return 0xFFFFFFFF;
5490 }
5491 else if ( aExp <= 0x7E ) {
5492 if ( aExp | aSig ) float_set_inexact();
5493 return 0;
5494 }
5495 aSig = ( aSig | 0x800000 )<<8;
5496 z = aSig>>( - shiftCount );
5497 if ( aSig<<( shiftCount & 31 ) ) {
5498 float_set_inexact();
5499 }
5500 return z;
5501
5502 }
5503
5504 #endif
5505
5506 #endif