00001 /*- 00002 * Copyright (c) 1992, 1993 00003 * The Regents of the University of California. All rights reserved. 00004 * 00005 * This software was developed by the Computer Systems Engineering group 00006 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 00007 * contributed to Berkeley. 00008 * 00009 * Redistribution and use in source and binary forms, with or without 00010 * modification, are permitted provided that the following conditions 00011 * are met: 00012 * 1. Redistributions of source code must retain the above copyright 00013 * notice, this list of conditions and the following disclaimer. 00014 * 2. Redistributions in binary form must reproduce the above copyright 00015 * notice, this list of conditions and the following disclaimer in the 00016 * documentation and/or other materials provided with the distribution. 00017 * 3. Neither the name of the University nor the names of its contributors 00018 * may be used to endorse or promote products derived from this software 00019 * without specific prior written permission. 00020 * 00021 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 00022 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00023 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00024 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 00025 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00026 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00027 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00028 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00029 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 00030 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00031 * SUCH DAMAGE. 00032 * 00033 * From: 00034 * @(#)muldi3.c 8.1 (Berkeley) 6/4/93 00035 * NetBSD: muldi3.c,v 1.1 2005/12/20 19:28:51 christos Exp 00036 */ 00037 00038 #include "longlong.h" 00039 00040 /* 00041 * Multiply two long longs. 00042 * 00043 * Our algorithm is based on the following. Split incoming long long 00044 * values u and v (where u,v >= 0) into 00045 * 00046 * u = 2^n u1 * u0 (n = number of bits in `unsigned int', usu. 32) 00047 * 00048 * and 00049 * 00050 * v = 2^n v1 * v0 00051 * 00052 * Then 00053 * 00054 * uv = 2^2n u1 v1 + 2^n u1 v0 + 2^n v1 u0 + u0 v0 00055 * = 2^2n u1 v1 + 2^n (u1 v0 + v1 u0) + u0 v0 00056 * 00057 * Now add 2^n u1 v1 to the first term and subtract it from the middle, 00058 * and add 2^n u0 v0 to the last term and subtract it from the middle. 00059 * This gives: 00060 * 00061 * uv = (2^2n + 2^n) (u1 v1) + 00062 * (2^n) (u1 v0 - u1 v1 + u0 v1 - u0 v0) + 00063 * (2^n + 1) (u0 v0) 00064 * 00065 * Factoring the middle a bit gives us: 00066 * 00067 * uv = (2^2n + 2^n) (u1 v1) + [u1v1 = high] 00068 * (2^n) (u1 - u0) (v0 - v1) + [(u1-u0)... = mid] 00069 * (2^n + 1) (u0 v0) [u0v0 = low] 00070 * 00071 * The terms (u1 v1), (u1 - u0) (v0 - v1), and (u0 v0) can all be done 00072 * in just half the precision of the original. (Note that either or both 00073 * of (u1 - u0) or (v0 - v1) may be negative.) 00074 * 00075 * This algorithm is from Knuth vol. 2 (2nd ed), section 4.3.3, p. 278. 00076 * 00077 * Since C does not give us a `int * int = long long' operator, we split 00078 * our input long longs into two ints, then split the two ints into two 00079 * shorts. We can then calculate `short * short = int' in native 00080 * arithmetic. 00081 * 00082 * Our product should, strictly speaking, be a `long long long', with 00083 * 128 bits, but we are going to discard the upper 64. In other words, 00084 * we are not interested in uv, but rather in (uv mod 2^2n). This 00085 * makes some of the terms above vanish, and we get: 00086 * 00087 * (2^n)(high) + (2^n)(mid) + (2^n + 1)(low) 00088 * 00089 * or 00090 * 00091 * (2^n)(high + mid + low) + low 00092 * 00093 * Furthermore, `high' and `mid' can be computed mod 2^n, as any factor 00094 * of 2^n in either one will also vanish. Only `low' need be computed 00095 * mod 2^2n, and only because of the final term above. 00096 */ 00097 static long long __lmulq(unsigned int, unsigned int); 00098 00099 long long 00100 __muldi3(long long a, long long b) 00101 { 00102 union uu u, v, low, prod; 00103 unsigned int high, mid, udiff, vdiff; 00104 int negall, negmid; 00105 #define u1 u.ui[H] 00106 #define u0 u.ui[L] 00107 #define v1 v.ui[H] 00108 #define v0 v.ui[L] 00109 00110 /* 00111 * Get u and v such that u, v >= 0. When this is finished, 00112 * u1, u0, v1, and v0 will be directly accessible through the 00113 * int fields. 00114 */ 00115 if (a >= 0) 00116 u.ll = a, negall = 0; 00117 else 00118 u.ll = -a, negall = 1; 00119 if (b >= 0) 00120 v.ll = b; 00121 else 00122 v.ll = -b, negall ^= 1; 00123 00124 if (u1 == 0 && v1 == 0) { 00125 /* 00126 * An (I hope) important optimization occurs when u1 and v1 00127 * are both 0. This should be common since most numbers 00128 * are small. Here the product is just u0*v0. 00129 */ 00130 prod.ll = __lmulq(u0, v0); 00131 } else { 00132 /* 00133 * Compute the three intermediate products, remembering 00134 * whether the middle term is negative. We can discard 00135 * any upper bits in high and mid, so we can use native 00136 * unsigned int * unsigned int => unsigned int arithmetic. 00137 */ 00138 low.ll = __lmulq(u0, v0); 00139 00140 if (u1 >= u0) 00141 negmid = 0, udiff = u1 - u0; 00142 else 00143 negmid = 1, udiff = u0 - u1; 00144 if (v0 >= v1) 00145 vdiff = v0 - v1; 00146 else 00147 vdiff = v1 - v0, negmid ^= 1; 00148 mid = udiff * vdiff; 00149 00150 high = u1 * v1; 00151 00152 /* 00153 * Assemble the final product. 00154 */ 00155 prod.ui[H] = high + (negmid ? -mid : mid) + low.ui[L] + 00156 low.ui[H]; 00157 prod.ui[L] = low.ui[L]; 00158 } 00159 return (negall ? -prod.ll : prod.ll); 00160 #undef u1 00161 #undef u0 00162 #undef v1 00163 #undef v0 00164 } 00165 00166 /* 00167 * Multiply two 2N-bit ints to produce a 4N-bit long long, where N is 00168 * half the number of bits in an int (whatever that is---the code 00169 * below does not care as long as the header file does its part of the 00170 * bargain---but typically N==16). 00171 * 00172 * We use the same algorithm from Knuth, but this time the modulo refinement 00173 * does not apply. On the other hand, since N is half the size of an int, 00174 * we can get away with native multiplication---none of our input terms 00175 * exceeds (UINT_MAX >> 1). 00176 * 00177 * Note that, for unsigned int l, the quad-precision (long long) result 00178 * 00179 * l << N 00180 * 00181 * splits into high and low ints as HHALF(l) and LHUP(l) respectively. 00182 */ 00183 static long long 00184 __lmulq(unsigned int u, unsigned int v) 00185 { 00186 unsigned int u1, u0, v1, v0, udiff, vdiff, high, mid, low; 00187 unsigned int prodh, prodl, was; 00188 union uu prod; 00189 int neg; 00190 00191 u1 = HHALF(u); 00192 u0 = LHALF(u); 00193 v1 = HHALF(v); 00194 v0 = LHALF(v); 00195 00196 low = u0 * v0; 00197 00198 /* This is the same small-number optimization as before. */ 00199 if (u1 == 0 && v1 == 0) 00200 return (low); 00201 00202 if (u1 >= u0) 00203 udiff = u1 - u0, neg = 0; 00204 else 00205 udiff = u0 - u1, neg = 1; 00206 if (v0 >= v1) 00207 vdiff = v0 - v1; 00208 else 00209 vdiff = v1 - v0, neg ^= 1; 00210 mid = udiff * vdiff; 00211 00212 high = u1 * v1; 00213 00214 /* prod = (high << 2N) + (high << N); */ 00215 prodh = high + HHALF(high); 00216 prodl = LHUP(high); 00217 00218 /* if (neg) prod -= mid << N; else prod += mid << N; */ 00219 if (neg) { 00220 was = prodl; 00221 prodl -= LHUP(mid); 00222 prodh -= HHALF(mid) + (prodl > was); 00223 } else { 00224 was = prodl; 00225 prodl += LHUP(mid); 00226 prodh += HHALF(mid) + (prodl < was); 00227 } 00228 00229 /* prod += low << N */ 00230 was = prodl; 00231 prodl += LHUP(low); 00232 prodh += HHALF(low) + (prodl < was); 00233 /* ... + low; */ 00234 if ((prodl += low) < low) 00235 prodh++; 00236 00237 /* return 4N-bit product */ 00238 prod.ui[H] = prodh; 00239 prod.ui[L] = prodl; 00240 return (prod.ll); 00241 }