/lib/ode/ode_source/ode/src/stepfast.cpp
C++ | 1143 lines | 816 code | 120 blank | 207 comment | 168 complexity | 21e3dd5c2709ef17758a92cf6579b3b1 MD5 | raw file
1/************************************************************************* 2 * * 3 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. * 4 * All rights reserved. Email: russ@q12.org Web: www.q12.org * 5 * * 6 * Fast iterative solver, David Whittaker. Email: david@csworkbench.com * 7 * * 8 * This library is free software; you can redistribute it and/or * 9 * modify it under the terms of EITHER: * 10 * (1) The GNU Lesser General Public License as published by the Free * 11 * Software Foundation; either version 2.1 of the License, or (at * 12 * your option) any later version. The text of the GNU Lesser * 13 * General Public License is included with this library in the * 14 * file LICENSE.TXT. * 15 * (2) The BSD-style license that is included with this library in * 16 * the file LICENSE-BSD.TXT. * 17 * * 18 * This library is distributed in the hope that it will be useful, * 19 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files * 21 * LICENSE.TXT and LICENSE-BSD.TXT for more details. * 22 * * 23 *************************************************************************/ 24 25// This is the StepFast code by David Whittaker. This code is faster, but 26// sometimes less stable than, the original "big matrix" code. 27// Refer to the user's manual for more information. 28// Note that this source file duplicates a lot of stuff from step.cpp, 29// eventually we should move the common code to a third file. 30 31#include "objects.h" 32#include "joints/joint.h" 33#include <ode/odeconfig.h> 34#include "config.h" 35#include <ode/objects.h> 36#include <ode/odemath.h> 37#include <ode/rotation.h> 38#include <ode/timer.h> 39#include <ode/error.h> 40#include <ode/matrix.h> 41#include <ode/misc.h> 42#include "lcp.h" 43#include "step.h" 44#include "util.h" 45 46 47// misc defines 48 49#define ALLOCA dALLOCA16 50 51#define RANDOM_JOINT_ORDER 52//#define FAST_FACTOR //use a factorization approximation to the LCP solver (fast, theoretically less accurate) 53#define SLOW_LCP //use the old LCP solver 54//#define NO_ISLANDS //does not perform island creation code (3~4% of simulation time), body disabling doesn't work 55//#define TIMING 56 57 58static int autoEnableDepth = 2; 59 60void dWorldSetAutoEnableDepthSF1 (dxWorld *world, int autodepth) 61{ 62 if (autodepth > 0) 63 autoEnableDepth = autodepth; 64 else 65 autoEnableDepth = 0; 66} 67 68int dWorldGetAutoEnableDepthSF1 (dxWorld *world) 69{ 70 return autoEnableDepth; 71} 72 73//little bit of math.... the _sym_ functions assume the return matrix will be symmetric 74static void 75Multiply2_sym_p8p (dReal * A, dReal * B, dReal * C, int p, int Askip) 76{ 77 int i, j; 78 dReal sum, *aa, *ad, *bb, *cc; 79 dIASSERT (p > 0 && A && B && C); 80 bb = B; 81 for (i = 0; i < p; i++) 82 { 83 //aa is going accross the matrix, ad down 84 aa = ad = A; 85 cc = C; 86 for (j = i; j < p; j++) 87 { 88 sum = bb[0] * cc[0]; 89 sum += bb[1] * cc[1]; 90 sum += bb[2] * cc[2]; 91 sum += bb[4] * cc[4]; 92 sum += bb[5] * cc[5]; 93 sum += bb[6] * cc[6]; 94 *(aa++) = *ad = sum; 95 ad += Askip; 96 cc += 8; 97 } 98 bb += 8; 99 A += Askip + 1; 100 C += 8; 101 } 102} 103 104static void 105MultiplyAdd2_sym_p8p (dReal * A, dReal * B, dReal * C, int p, int Askip) 106{ 107 int i, j; 108 dReal sum, *aa, *ad, *bb, *cc; 109 dIASSERT (p > 0 && A && B && C); 110 bb = B; 111 for (i = 0; i < p; i++) 112 { 113 //aa is going accross the matrix, ad down 114 aa = ad = A; 115 cc = C; 116 for (j = i; j < p; j++) 117 { 118 sum = bb[0] * cc[0]; 119 sum += bb[1] * cc[1]; 120 sum += bb[2] * cc[2]; 121 sum += bb[4] * cc[4]; 122 sum += bb[5] * cc[5]; 123 sum += bb[6] * cc[6]; 124 *(aa++) += sum; 125 *ad += sum; 126 ad += Askip; 127 cc += 8; 128 } 129 bb += 8; 130 A += Askip + 1; 131 C += 8; 132 } 133} 134 135 136// this assumes the 4th and 8th rows of B are zero. 137 138static void 139Multiply0_p81 (dReal * A, dReal * B, dReal * C, int p) 140{ 141 int i; 142 dIASSERT (p > 0 && A && B && C); 143 dReal sum; 144 for (i = p; i; i--) 145 { 146 sum = B[0] * C[0]; 147 sum += B[1] * C[1]; 148 sum += B[2] * C[2]; 149 sum += B[4] * C[4]; 150 sum += B[5] * C[5]; 151 sum += B[6] * C[6]; 152 *(A++) = sum; 153 B += 8; 154 } 155} 156 157 158// this assumes the 4th and 8th rows of B are zero. 159 160static void 161MultiplyAdd0_p81 (dReal * A, dReal * B, dReal * C, int p) 162{ 163 int i; 164 dIASSERT (p > 0 && A && B && C); 165 dReal sum; 166 for (i = p; i; i--) 167 { 168 sum = B[0] * C[0]; 169 sum += B[1] * C[1]; 170 sum += B[2] * C[2]; 171 sum += B[4] * C[4]; 172 sum += B[5] * C[5]; 173 sum += B[6] * C[6]; 174 *(A++) += sum; 175 B += 8; 176 } 177} 178 179 180// this assumes the 4th and 8th rows of B are zero. 181 182static void 183Multiply1_8q1 (dReal * A, dReal * B, dReal * C, int q) 184{ 185 int k; 186 dReal sum; 187 dIASSERT (q > 0 && A && B && C); 188 sum = 0; 189 for (k = 0; k < q; k++) 190 sum += B[k * 8] * C[k]; 191 A[0] = sum; 192 sum = 0; 193 for (k = 0; k < q; k++) 194 sum += B[1 + k * 8] * C[k]; 195 A[1] = sum; 196 sum = 0; 197 for (k = 0; k < q; k++) 198 sum += B[2 + k * 8] * C[k]; 199 A[2] = sum; 200 sum = 0; 201 for (k = 0; k < q; k++) 202 sum += B[4 + k * 8] * C[k]; 203 A[4] = sum; 204 sum = 0; 205 for (k = 0; k < q; k++) 206 sum += B[5 + k * 8] * C[k]; 207 A[5] = sum; 208 sum = 0; 209 for (k = 0; k < q; k++) 210 sum += B[6 + k * 8] * C[k]; 211 A[6] = sum; 212} 213 214//**************************************************************************** 215// body rotation 216 217// return sin(x)/x. this has a singularity at 0 so special handling is needed 218// for small arguments. 219 220static inline dReal 221sinc (dReal x) 222{ 223 // if |x| < 1e-4 then use a taylor series expansion. this two term expansion 224 // is actually accurate to one LS bit within this range if double precision 225 // is being used - so don't worry! 226 if (dFabs (x) < 1.0e-4) 227 return REAL (1.0) - x * x * REAL (0.166666666666666666667); 228 else 229 return dSin (x) / x; 230} 231 232#if 0 // this is just dxStepBody() 233// given a body b, apply its linear and angular rotation over the time 234// interval h, thereby adjusting its position and orientation. 235 236static inline void 237moveAndRotateBody (dxBody * b, dReal h) 238{ 239 int j; 240 241 // handle linear velocity 242 for (j = 0; j < 3; j++) 243 b->posr.pos[j] += h * b->lvel[j]; 244 245 if (b->flags & dxBodyFlagFiniteRotation) 246 { 247 dVector3 irv; // infitesimal rotation vector 248 dQuaternion q; // quaternion for finite rotation 249 250 if (b->flags & dxBodyFlagFiniteRotationAxis) 251 { 252 // split the angular velocity vector into a component along the finite 253 // rotation axis, and a component orthogonal to it. 254 dVector3 frv, irv; // finite rotation vector 255 dReal k = dDOT (b->finite_rot_axis, b->avel); 256 frv[0] = b->finite_rot_axis[0] * k; 257 frv[1] = b->finite_rot_axis[1] * k; 258 frv[2] = b->finite_rot_axis[2] * k; 259 irv[0] = b->avel[0] - frv[0]; 260 irv[1] = b->avel[1] - frv[1]; 261 irv[2] = b->avel[2] - frv[2]; 262 263 // make a rotation quaternion q that corresponds to frv * h. 264 // compare this with the full-finite-rotation case below. 265 h *= REAL (0.5); 266 dReal theta = k * h; 267 q[0] = dCos (theta); 268 dReal s = sinc (theta) * h; 269 q[1] = frv[0] * s; 270 q[2] = frv[1] * s; 271 q[3] = frv[2] * s; 272 } 273 else 274 { 275 // make a rotation quaternion q that corresponds to w * h 276 dReal wlen = dSqrt (b->avel[0] * b->avel[0] + b->avel[1] * b->avel[1] + b->avel[2] * b->avel[2]); 277 h *= REAL (0.5); 278 dReal theta = wlen * h; 279 q[0] = dCos (theta); 280 dReal s = sinc (theta) * h; 281 q[1] = b->avel[0] * s; 282 q[2] = b->avel[1] * s; 283 q[3] = b->avel[2] * s; 284 } 285 286 // do the finite rotation 287 dQuaternion q2; 288 dQMultiply0 (q2, q, b->q); 289 for (j = 0; j < 4; j++) 290 b->q[j] = q2[j]; 291 292 // do the infitesimal rotation if required 293 if (b->flags & dxBodyFlagFiniteRotationAxis) 294 { 295 dReal dq[4]; 296 dWtoDQ (irv, b->q, dq); 297 for (j = 0; j < 4; j++) 298 b->q[j] += h * dq[j]; 299 } 300 } 301 else 302 { 303 // the normal way - do an infitesimal rotation 304 dReal dq[4]; 305 dWtoDQ (b->avel, b->q, dq); 306 for (j = 0; j < 4; j++) 307 b->q[j] += h * dq[j]; 308 } 309 310 // normalize the quaternion and convert it to a rotation matrix 311 dNormalize4 (b->q); 312 dQtoR (b->q, b->posr.R); 313 314 // notify all attached geoms that this body has moved 315 for (dxGeom * geom = b->geom; geom; geom = dGeomGetBodyNext (geom)) 316 dGeomMoved (geom); 317} 318#endif 319 320//**************************************************************************** 321//This is an implementation of the iterated/relaxation algorithm. 322//Here is a quick overview of the algorithm per Sergi Valverde's posts to the 323//mailing list: 324// 325// for i=0..N-1 do 326// for c = 0..C-1 do 327// Solve constraint c-th 328// Apply forces to constraint bodies 329// next 330// next 331// Integrate bodies 332 333void 334dInternalStepFast (dxWorld * world, dxBody * body[2], dReal * GI[2], dReal * GinvI[2], dxJoint * joint, dxJoint::Info1 info, dxJoint::Info2 Jinfo, dReal stepsize) 335{ 336 int i, j, k; 337# ifdef TIMING 338 dTimerNow ("constraint preprocessing"); 339# endif 340 341 dReal stepsize1 = dRecip (stepsize); 342 343 int m = info.m; 344 // nothing to do if no constraints. 345 if (m <= 0) 346 return; 347 348 int nub = 0; 349 if (info.nub == info.m) 350 nub = m; 351 352 // compute A = J*invM*J'. first compute JinvM = J*invM. this has the same 353 // format as J so we just go through the constraints in J multiplying by 354 // the appropriate scalars and matrices. 355# ifdef TIMING 356 dTimerNow ("compute A"); 357# endif 358 dReal JinvM[2 * 6 * 8]; 359 //dSetZero (JinvM, 2 * m * 8); 360 361 dReal *Jsrc = Jinfo.J1l; 362 dReal *Jdst = JinvM; 363 if (body[0]) 364 { 365 for (j = m - 1; j >= 0; j--) 366 { 367 for (k = 0; k < 3; k++) 368 Jdst[k] = Jsrc[k] * body[0]->invMass; 369 dMULTIPLY0_133 (Jdst + 4, Jsrc + 4, GinvI[0]); 370 Jsrc += 8; 371 Jdst += 8; 372 } 373 } 374 if (body[1]) 375 { 376 Jsrc = Jinfo.J2l; 377 Jdst = JinvM + 8 * m; 378 for (j = m - 1; j >= 0; j--) 379 { 380 for (k = 0; k < 3; k++) 381 Jdst[k] = Jsrc[k] * body[1]->invMass; 382 dMULTIPLY0_133 (Jdst + 4, Jsrc + 4, GinvI[1]); 383 Jsrc += 8; 384 Jdst += 8; 385 } 386 } 387 388 389 // now compute A = JinvM * J'. 390 int mskip = dPAD (m); 391 dReal A[6 * 8]; 392 //dSetZero (A, 6 * 8); 393 394 if (body[0]) { 395 Multiply2_sym_p8p (A, JinvM, Jinfo.J1l, m, mskip); 396 if (body[1]) 397 MultiplyAdd2_sym_p8p (A, JinvM + 8 * m, Jinfo.J2l, 398 m, mskip); 399 } else { 400 if (body[1]) 401 Multiply2_sym_p8p (A, JinvM + 8 * m, Jinfo.J2l, 402 m, mskip); 403 } 404 405 // add cfm to the diagonal of A 406 for (i = 0; i < m; i++) 407 A[i * mskip + i] += Jinfo.cfm[i] * stepsize1; 408 409 // compute the right hand side `rhs' 410# ifdef TIMING 411 dTimerNow ("compute rhs"); 412# endif 413 dReal tmp1[16]; 414 //dSetZero (tmp1, 16); 415 // put v/h + invM*fe into tmp1 416 for (i = 0; i < 2; i++) 417 { 418 if (!body[i]) 419 continue; 420 for (j = 0; j < 3; j++) 421 tmp1[i * 8 + j] = body[i]->facc[j] * body[i]->invMass + body[i]->lvel[j] * stepsize1; 422 dMULTIPLY0_331 (tmp1 + i * 8 + 4, GinvI[i], body[i]->tacc); 423 for (j = 0; j < 3; j++) 424 tmp1[i * 8 + 4 + j] += body[i]->avel[j] * stepsize1; 425 } 426 // put J*tmp1 into rhs 427 dReal rhs[6]; 428 //dSetZero (rhs, 6); 429 430 if (body[0]) { 431 Multiply0_p81 (rhs, Jinfo.J1l, tmp1, m); 432 if (body[1]) 433 MultiplyAdd0_p81 (rhs, Jinfo.J2l, tmp1 + 8, m); 434 } else { 435 if (body[1]) 436 Multiply0_p81 (rhs, Jinfo.J2l, tmp1 + 8, m); 437 } 438 439 // complete rhs 440 for (i = 0; i < m; i++) 441 rhs[i] = Jinfo.c[i] * stepsize1 - rhs[i]; 442 443#ifdef SLOW_LCP 444 // solve the LCP problem and get lambda. 445 // this will destroy A but that's okay 446# ifdef TIMING 447 dTimerNow ("solving LCP problem"); 448# endif 449 dReal *lambda = (dReal *) ALLOCA (m * sizeof (dReal)); 450 dReal *residual = (dReal *) ALLOCA (m * sizeof (dReal)); 451 dReal lo[6], hi[6]; 452 memcpy (lo, Jinfo.lo, m * sizeof (dReal)); 453 memcpy (hi, Jinfo.hi, m * sizeof (dReal)); 454 dSolveLCP (m, A, lambda, rhs, residual, nub, lo, hi, Jinfo.findex); 455#endif 456 457 // LCP Solver replacement: 458 // This algorithm goes like this: 459 // Do a straightforward LDLT factorization of the matrix A, solving for 460 // A*x = rhs 461 // For each x[i] that is outside of the bounds of lo[i] and hi[i], 462 // clamp x[i] into that range. 463 // Substitute into A the now known x's 464 // subtract the residual away from the rhs. 465 // Remove row and column i from L, updating the factorization 466 // place the known x's at the end of the array, keeping up with location in p 467 // Repeat until all constraints have been clamped or all are within bounds 468 // 469 // This is probably only faster in the single joint case where only one repeat is 470 // the norm. 471 472#ifdef FAST_FACTOR 473 // factorize A (L*D*L'=A) 474# ifdef TIMING 475 dTimerNow ("factorize A"); 476# endif 477 dReal d[6]; 478 dReal L[6 * 8]; 479 memcpy (L, A, m * mskip * sizeof (dReal)); 480 dFactorLDLT (L, d, m, mskip); 481 482 // compute lambda 483# ifdef TIMING 484 dTimerNow ("compute lambda"); 485# endif 486 487 int left = m; //constraints left to solve. 488 int remove[6]; 489 dReal lambda[6]; 490 dReal x[6]; 491 int p[6]; 492 for (i = 0; i < 6; i++) 493 p[i] = i; 494 while (true) 495 { 496 memcpy (x, rhs, left * sizeof (dReal)); 497 dSolveLDLT (L, d, x, left, mskip); 498 499 int fixed = 0; 500 for (i = 0; i < left; i++) 501 { 502 j = p[i]; 503 remove[i] = false; 504 // This isn't the exact same use of findex as dSolveLCP.... since x[findex] 505 // may change after I've already clamped x[i], but it should be close 506 if (Jinfo.findex[j] > -1) 507 { 508 dReal f = fabs (Jinfo.hi[j] * x[p[Jinfo.findex[j]]]); 509 if (x[i] > f) 510 x[i] = f; 511 else if (x[i] < -f) 512 x[i] = -f; 513 else 514 continue; 515 } 516 else 517 { 518 if (x[i] > Jinfo.hi[j]) 519 x[i] = Jinfo.hi[j]; 520 else if (x[i] < Jinfo.lo[j]) 521 x[i] = Jinfo.lo[j]; 522 else 523 continue; 524 } 525 remove[i] = true; 526 fixed++; 527 } 528 if (fixed == 0 || fixed == left) //no change or all constraints solved 529 break; 530 531 for (i = 0; i < left; i++) //sub in to right hand side. 532 if (remove[i]) 533 for (j = 0; j < left; j++) 534 if (!remove[j]) 535 rhs[j] -= A[j * mskip + i] * x[i]; 536 537 for (int r = left - 1; r >= 0; r--) //eliminate row/col for fixed variables 538 { 539 if (remove[r]) 540 { 541 //dRemoveLDLT adapted for use without row pointers. 542 if (r == left - 1) 543 { 544 left--; 545 continue; // deleting last row/col is easy 546 } 547 else if (r == 0) 548 { 549 dReal a[6]; 550 for (i = 0; i < left; i++) 551 a[i] = -A[i * mskip]; 552 a[0] += REAL (1.0); 553 dLDLTAddTL (L, d, a, left, mskip); 554 } 555 else 556 { 557 dReal t[6]; 558 dReal a[6]; 559 for (i = 0; i < r; i++) 560 t[i] = L[r * mskip + i] / d[i]; 561 for (i = 0; i < left - r; i++) 562 a[i] = dDot (L + (r + i) * mskip, t, r) - A[(r + i) * mskip + r]; 563 a[0] += REAL (1.0); 564 dLDLTAddTL (L + r * mskip + r, d + r, a, left - r, mskip); 565 } 566 567 dRemoveRowCol (L, left, mskip, r); 568 //end dRemoveLDLT 569 570 left--; 571 if (r < (left - 1)) 572 { 573 dReal tx = x[r]; 574 memmove (d + r, d + r + 1, (left - r) * sizeof (dReal)); 575 memmove (rhs + r, rhs + r + 1, (left - r) * sizeof (dReal)); 576 //x will get written over by rhs anyway, no need to move it around 577 //just store the fixed value we just discovered in it. 578 x[left] = tx; 579 for (i = 0; i < m; i++) 580 if (p[i] > r && p[i] <= left) 581 p[i]--; 582 p[r] = left; 583 } 584 } 585 } 586 } 587 588 for (i = 0; i < m; i++) 589 lambda[i] = x[p[i]]; 590# endif 591 // compute the constraint force `cforce' 592# ifdef TIMING 593 dTimerNow ("compute constraint force"); 594#endif 595 596 // compute cforce = J'*lambda 597 dJointFeedback *fb = joint->feedback; 598 dReal cforce[16]; 599 //dSetZero (cforce, 16); 600 601 if (fb) 602 { 603 // the user has requested feedback on the amount of force that this 604 // joint is applying to the bodies. we use a slightly slower 605 // computation that splits out the force components and puts them 606 // in the feedback structure. 607 dReal data1[8], data2[8]; 608 if (body[0]) 609 { 610 Multiply1_8q1 (data1, Jinfo.J1l, lambda, m); 611 dReal *cf1 = cforce; 612 cf1[0] = (fb->f1[0] = data1[0]); 613 cf1[1] = (fb->f1[1] = data1[1]); 614 cf1[2] = (fb->f1[2] = data1[2]); 615 cf1[4] = (fb->t1[0] = data1[4]); 616 cf1[5] = (fb->t1[1] = data1[5]); 617 cf1[6] = (fb->t1[2] = data1[6]); 618 } 619 if (body[1]) 620 { 621 Multiply1_8q1 (data2, Jinfo.J2l, lambda, m); 622 dReal *cf2 = cforce + 8; 623 cf2[0] = (fb->f2[0] = data2[0]); 624 cf2[1] = (fb->f2[1] = data2[1]); 625 cf2[2] = (fb->f2[2] = data2[2]); 626 cf2[4] = (fb->t2[0] = data2[4]); 627 cf2[5] = (fb->t2[1] = data2[5]); 628 cf2[6] = (fb->t2[2] = data2[6]); 629 } 630 } 631 else 632 { 633 // no feedback is required, let's compute cforce the faster way 634 if (body[0]) 635 Multiply1_8q1 (cforce, Jinfo.J1l, lambda, m); 636 if (body[1]) 637 Multiply1_8q1 (cforce + 8, Jinfo.J2l, lambda, m); 638 } 639 640 for (i = 0; i < 2; i++) 641 { 642 if (!body[i]) 643 continue; 644 for (j = 0; j < 3; j++) 645 { 646 body[i]->facc[j] += cforce[i * 8 + j]; 647 body[i]->tacc[j] += cforce[i * 8 + 4 + j]; 648 } 649 } 650} 651 652void 653dInternalStepIslandFast (dxWorld * world, dxBody * const *bodies, int nb, dxJoint * const *_joints, int nj, dReal stepsize, int maxiterations) 654{ 655# ifdef TIMING 656 dTimerNow ("preprocessing"); 657# endif 658 dxBody *bodyPair[2], *body; 659 dReal *GIPair[2], *GinvIPair[2]; 660 dxJoint *joint; 661 int iter, b, j, i; 662 dReal ministep = stepsize / maxiterations; 663 664 // make a local copy of the joint array, because we might want to modify it. 665 // (the "dxJoint *const*" declaration says we're allowed to modify the joints 666 // but not the joint array, because the caller might need it unchanged). 667 dxJoint **joints = (dxJoint **) ALLOCA (nj * sizeof (dxJoint *)); 668 memcpy (joints, _joints, nj * sizeof (dxJoint *)); 669 670 // get m = total constraint dimension, nub = number of unbounded variables. 671 // create constraint offset array and number-of-rows array for all joints. 672 // the constraints are re-ordered as follows: the purely unbounded 673 // constraints, the mixed unbounded + LCP constraints, and last the purely 674 // LCP constraints. this assists the LCP solver to put all unbounded 675 // variables at the start for a quick factorization. 676 // 677 // joints with m=0 are inactive and are removed from the joints array 678 // entirely, so that the code that follows does not consider them. 679 // also number all active joints in the joint list (set their tag values). 680 // inactive joints receive a tag value of -1. 681 682 int m = 0; 683 dxJoint::Info1 * info = (dxJoint::Info1 *) ALLOCA (nj * sizeof (dxJoint::Info1)); 684 int *ofs = (int *) ALLOCA (nj * sizeof (int)); 685 for (i = 0, j = 0; j < nj; j++) 686 { // i=dest, j=src 687 joints[j]->getInfo1 (info + i); 688 dIASSERT (info[i].m >= 0 && info[i].m <= 6 && info[i].nub >= 0 && info[i].nub <= info[i].m); 689 if (info[i].m > 0) 690 { 691 joints[i] = joints[j]; 692 joints[i]->tag = i; 693 i++; 694 } 695 else 696 { 697 joints[j]->tag = -1; 698 } 699 } 700 nj = i; 701 702 // the purely unbounded constraints 703 for (i = 0; i < nj; i++) 704 { 705 ofs[i] = m; 706 m += info[i].m; 707 } 708 dReal *c = NULL; 709 dReal *cfm = NULL; 710 dReal *lo = NULL; 711 dReal *hi = NULL; 712 int *findex = NULL; 713 714 dReal *J = NULL; 715 dxJoint::Info2 * Jinfo = NULL; 716 717 if (m) 718 { 719 // create a constraint equation right hand side vector `c', a constraint 720 // force mixing vector `cfm', and LCP low and high bound vectors, and an 721 // 'findex' vector. 722 c = (dReal *) ALLOCA (m * sizeof (dReal)); 723 cfm = (dReal *) ALLOCA (m * sizeof (dReal)); 724 lo = (dReal *) ALLOCA (m * sizeof (dReal)); 725 hi = (dReal *) ALLOCA (m * sizeof (dReal)); 726 findex = (int *) ALLOCA (m * sizeof (int)); 727 dSetZero (c, m); 728 dSetValue (cfm, m, world->global_cfm); 729 dSetValue (lo, m, -dInfinity); 730 dSetValue (hi, m, dInfinity); 731 for (i = 0; i < m; i++) 732 findex[i] = -1; 733 734 // get jacobian data from constraints. a (2*m)x8 matrix will be created 735 // to store the two jacobian blocks from each constraint. it has this 736 // format: 737 // 738 // l l l 0 a a a 0 \ . 739 // l l l 0 a a a 0 }-- jacobian body 1 block for joint 0 (3 rows) 740 // l l l 0 a a a 0 / 741 // l l l 0 a a a 0 \ . 742 // l l l 0 a a a 0 }-- jacobian body 2 block for joint 0 (3 rows) 743 // l l l 0 a a a 0 / 744 // l l l 0 a a a 0 }--- jacobian body 1 block for joint 1 (1 row) 745 // l l l 0 a a a 0 }--- jacobian body 2 block for joint 1 (1 row) 746 // etc... 747 // 748 // (lll) = linear jacobian data 749 // (aaa) = angular jacobian data 750 // 751# ifdef TIMING 752 dTimerNow ("create J"); 753# endif 754 J = (dReal *) ALLOCA (2 * m * 8 * sizeof (dReal)); 755 dSetZero (J, 2 * m * 8); 756 Jinfo = (dxJoint::Info2 *) ALLOCA (nj * sizeof (dxJoint::Info2)); 757 for (i = 0; i < nj; i++) 758 { 759 Jinfo[i].rowskip = 8; 760 Jinfo[i].fps = dRecip (stepsize); 761 Jinfo[i].erp = world->global_erp; 762 Jinfo[i].J1l = J + 2 * 8 * ofs[i]; 763 Jinfo[i].J1a = Jinfo[i].J1l + 4; 764 Jinfo[i].J2l = Jinfo[i].J1l + 8 * info[i].m; 765 Jinfo[i].J2a = Jinfo[i].J2l + 4; 766 Jinfo[i].c = c + ofs[i]; 767 Jinfo[i].cfm = cfm + ofs[i]; 768 Jinfo[i].lo = lo + ofs[i]; 769 Jinfo[i].hi = hi + ofs[i]; 770 Jinfo[i].findex = findex + ofs[i]; 771 //joints[i]->getInfo2 (Jinfo+i); 772 } 773 774 } 775 776 dReal *saveFacc = (dReal *) ALLOCA (nb * 4 * sizeof (dReal)); 777 dReal *saveTacc = (dReal *) ALLOCA (nb * 4 * sizeof (dReal)); 778 dReal *globalI = (dReal *) ALLOCA (nb * 12 * sizeof (dReal)); 779 dReal *globalInvI = (dReal *) ALLOCA (nb * 12 * sizeof (dReal)); 780 for (b = 0; b < nb; b++) 781 { 782 for (i = 0; i < 4; i++) 783 { 784 saveFacc[b * 4 + i] = bodies[b]->facc[i]; 785 saveTacc[b * 4 + i] = bodies[b]->tacc[i]; 786 } 787 bodies[b]->tag = b; 788 } 789 790 for (iter = 0; iter < maxiterations; iter++) 791 { 792# ifdef TIMING 793 dTimerNow ("applying inertia and gravity"); 794# endif 795 dReal tmp[12] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 796 797 for (b = 0; b < nb; b++) 798 { 799 body = bodies[b]; 800 801 // for all bodies, compute the inertia tensor and its inverse in the global 802 // frame, and compute the rotational force and add it to the torque 803 // accumulator. I and invI are vertically stacked 3x4 matrices, one per body. 804 // @@@ check computation of rotational force. 805 806 // compute inertia tensor in global frame 807 dMULTIPLY2_333 (tmp, body->mass.I, body->posr.R); 808 dMULTIPLY0_333 (globalI + b * 12, body->posr.R, tmp); 809 // compute inverse inertia tensor in global frame 810 dMULTIPLY2_333 (tmp, body->invI, body->posr.R); 811 dMULTIPLY0_333 (globalInvI + b * 12, body->posr.R, tmp); 812 813 for (i = 0; i < 4; i++) 814 body->tacc[i] = saveTacc[b * 4 + i]; 815 816 if (body->flags & dxBodyGyroscopic) { 817 // DanielKO: this doesn't look right/efficient, but anyways... 818 // compute rotational force 819 dMULTIPLY0_331 (tmp, globalI + b * 12, body->avel); 820 dCROSS (body->tacc, -=, body->avel, tmp); 821 } 822 823 // add the gravity force to all bodies 824 if ((body->flags & dxBodyNoGravity) == 0) 825 { 826 body->facc[0] = saveFacc[b * 4 + 0] + body->mass.mass * world->gravity[0]; 827 body->facc[1] = saveFacc[b * 4 + 1] + body->mass.mass * world->gravity[1]; 828 body->facc[2] = saveFacc[b * 4 + 2] + body->mass.mass * world->gravity[2]; 829 body->facc[3] = 0; 830 } else { 831 body->facc[0] = saveFacc[b * 4 + 0]; 832 body->facc[1] = saveFacc[b * 4 + 1]; 833 body->facc[2] = saveFacc[b * 4 + 2]; 834 body->facc[3] = 0; 835 } 836 837 } 838 839#ifdef RANDOM_JOINT_ORDER 840#ifdef TIMING 841 dTimerNow ("randomizing joint order"); 842#endif 843 //randomize the order of the joints by looping through the array 844 //and swapping the current joint pointer with a random one before it. 845 for (j = 0; j < nj; j++) 846 { 847 joint = joints[j]; 848 dxJoint::Info1 i1 = info[j]; 849 dxJoint::Info2 i2 = Jinfo[j]; 850 const int r = dRandInt(j+1); 851 dIASSERT (r < nj); 852 joints[j] = joints[r]; 853 info[j] = info[r]; 854 Jinfo[j] = Jinfo[r]; 855 joints[r] = joint; 856 info[r] = i1; 857 Jinfo[r] = i2; 858 } 859#endif 860 861 //now iterate through the random ordered joint array we created. 862 for (j = 0; j < nj; j++) 863 { 864#ifdef TIMING 865 dTimerNow ("setting up joint"); 866#endif 867 joint = joints[j]; 868 bodyPair[0] = joint->node[0].body; 869 bodyPair[1] = joint->node[1].body; 870 871 if (bodyPair[0] && (bodyPair[0]->flags & dxBodyDisabled)) 872 bodyPair[0] = 0; 873 if (bodyPair[1] && (bodyPair[1]->flags & dxBodyDisabled)) 874 bodyPair[1] = 0; 875 876 //if this joint is not connected to any enabled bodies, skip it. 877 if (!bodyPair[0] && !bodyPair[1]) 878 continue; 879 880 if (bodyPair[0]) 881 { 882 GIPair[0] = globalI + bodyPair[0]->tag * 12; 883 GinvIPair[0] = globalInvI + bodyPair[0]->tag * 12; 884 } 885 if (bodyPair[1]) 886 { 887 GIPair[1] = globalI + bodyPair[1]->tag * 12; 888 GinvIPair[1] = globalInvI + bodyPair[1]->tag * 12; 889 } 890 891 joints[j]->getInfo2 (Jinfo + j); 892 893 //dInternalStepIslandFast is an exact copy of the old routine with one 894 //modification: the calculated forces are added back to the facc and tacc 895 //vectors instead of applying them to the bodies and moving them. 896 if (info[j].m > 0) 897 { 898 dInternalStepFast (world, bodyPair, GIPair, GinvIPair, joint, info[j], Jinfo[j], ministep); 899 } 900 } 901 // } 902# ifdef TIMING 903 dTimerNow ("moving bodies"); 904# endif 905 //Now we can simulate all the free floating bodies, and move them. 906 for (b = 0; b < nb; b++) 907 { 908 body = bodies[b]; 909 910 for (i = 0; i < 4; i++) 911 { 912 body->facc[i] *= ministep; 913 body->tacc[i] *= ministep; 914 } 915 916 //apply torque 917 dMULTIPLYADD0_331 (body->avel, globalInvI + b * 12, body->tacc); 918 919 //apply force 920 for (i = 0; i < 3; i++) 921 body->lvel[i] += body->invMass * body->facc[i]; 922 923 //move It! 924 dxStepBody (body, ministep); 925 } 926 } 927 for (b = 0; b < nb; b++) 928 for (j = 0; j < 4; j++) 929 bodies[b]->facc[j] = bodies[b]->tacc[j] = 0; 930} 931 932 933#ifdef NO_ISLANDS 934 935// Since the iterative algorithm doesn't care about islands of bodies, this is a 936// faster algorithm that just sends it all the joints and bodies in one array. 937// It's downfall is it's inability to handle disabled bodies as well as the old one. 938static void 939processIslandsFast (dxWorld * world, dReal stepsize, int maxiterations) 940{ 941 // nothing to do if no bodies 942 if (world->nb <= 0) 943 return; 944 945 dInternalHandleAutoDisabling (world,stepsize); 946 947# ifdef TIMING 948 dTimerStart ("creating joint and body arrays"); 949# endif 950 dxBody **bodies, *body; 951 dxJoint **joints, *joint; 952 joints = (dxJoint **) ALLOCA (world->nj * sizeof (dxJoint *)); 953 bodies = (dxBody **) ALLOCA (world->nb * sizeof (dxBody *)); 954 955 int nj = 0; 956 for (joint = world->firstjoint; joint; joint = (dxJoint *) joint->next) 957 joints[nj++] = joint; 958 959 int nb = 0; 960 for (body = world->firstbody; body; body = (dxBody *) body->next) 961 bodies[nb++] = body; 962 963 dInternalStepIslandFast (world, bodies, nb, joints, nj, stepsize, maxiterations); 964# ifdef TIMING 965 dTimerEnd (); 966 dTimerReport (stdout, 1); 967# endif 968} 969 970#else 971 972//**************************************************************************** 973// island processing 974 975// this groups all joints and bodies in a world into islands. all objects 976// in an island are reachable by going through connected bodies and joints. 977// each island can be simulated separately. 978// note that joints that are not attached to anything will not be included 979// in any island, an so they do not affect the simulation. 980// 981// this function starts new island from unvisited bodies. however, it will 982// never start a new islands from a disabled body. thus islands of disabled 983// bodies will not be included in the simulation. disabled bodies are 984// re-enabled if they are found to be part of an active island. 985 986static void 987processIslandsFast (dxWorld * world, dReal stepsize, int maxiterations) 988{ 989#ifdef TIMING 990 dTimerStart ("Island Setup"); 991#endif 992 dxBody *b, *bb, **body; 993 dxJoint *j, **joint; 994 995 // nothing to do if no bodies 996 if (world->nb <= 0) 997 return; 998 999 dInternalHandleAutoDisabling (world,stepsize); 1000 1001 // make arrays for body and joint lists (for a single island) to go into 1002 body = (dxBody **) ALLOCA (world->nb * sizeof (dxBody *)); 1003 joint = (dxJoint **) ALLOCA (world->nj * sizeof (dxJoint *)); 1004 int bcount = 0; // number of bodies in `body' 1005 int jcount = 0; // number of joints in `joint' 1006 int tbcount = 0; 1007 int tjcount = 0; 1008 1009 // set all body/joint tags to 0 1010 for (b = world->firstbody; b; b = (dxBody *) b->next) 1011 b->tag = 0; 1012 for (j = world->firstjoint; j; j = (dxJoint *) j->next) 1013 j->tag = 0; 1014 1015 // allocate a stack of unvisited bodies in the island. the maximum size of 1016 // the stack can be the lesser of the number of bodies or joints, because 1017 // new bodies are only ever added to the stack by going through untagged 1018 // joints. all the bodies in the stack must be tagged! 1019 int stackalloc = (world->nj < world->nb) ? world->nj : world->nb; 1020 dxBody **stack = (dxBody **) ALLOCA (stackalloc * sizeof (dxBody *)); 1021 int *autostack = (int *) ALLOCA (stackalloc * sizeof (int)); 1022 1023 for (bb = world->firstbody; bb; bb = (dxBody *) bb->next) 1024 { 1025#ifdef TIMING 1026 dTimerNow ("Island Processing"); 1027#endif 1028 // get bb = the next enabled, untagged body, and tag it 1029 if (bb->tag || (bb->flags & dxBodyDisabled) || (bb->invMass == 0)) 1030 continue; 1031 bb->tag = 1; 1032 1033 // tag all bodies and joints starting from bb. 1034 int stacksize = 0; 1035 int autoDepth = autoEnableDepth; 1036 b = bb; 1037 body[0] = bb; 1038 bcount = 1; 1039 jcount = 0; 1040 goto quickstart; 1041 while (stacksize > 0) 1042 { 1043 b = stack[--stacksize]; // pop body off stack 1044 autoDepth = autostack[stacksize]; 1045 body[bcount++] = b; // put body on body list 1046 quickstart: 1047 1048 // traverse and tag all body's joints, add untagged connected bodies 1049 // to stack 1050 for (dxJointNode * n = b->firstjoint; n; n = n->next) 1051 { 1052 if (!n->joint->tag) 1053 { 1054 int thisDepth = autoEnableDepth; 1055 n->joint->tag = 1; 1056 joint[jcount++] = n->joint; 1057 if (n->body && !n->body->tag) 1058 { 1059 if (n->body->flags & dxBodyDisabled) 1060 thisDepth = autoDepth - 1; 1061 if (thisDepth < 0) 1062 continue; 1063 n->body->flags &= ~dxBodyDisabled; 1064 n->body->tag = 1; 1065 autostack[stacksize] = thisDepth; 1066 stack[stacksize++] = n->body; 1067 } 1068 } 1069 } 1070 dIASSERT (stacksize <= world->nb); 1071 dIASSERT (stacksize <= world->nj); 1072 } 1073 1074 // now do something with body and joint lists 1075 dInternalStepIslandFast (world, body, bcount, joint, jcount, stepsize, maxiterations); 1076 1077 // what we've just done may have altered the body/joint tag values. 1078 // we must make sure that these tags are nonzero. 1079 // also make sure all bodies are in the enabled state. 1080 int i; 1081 for (i = 0; i < bcount; i++) 1082 { 1083 body[i]->tag = 1; 1084 body[i]->flags &= ~dxBodyDisabled; 1085 } 1086 for (i = 0; i < jcount; i++) 1087 joint[i]->tag = 1; 1088 1089 tbcount += bcount; 1090 tjcount += jcount; 1091 } 1092 1093#ifdef TIMING 1094 dMessage(0, "Total joints processed: %i, bodies: %i", tjcount, tbcount); 1095#endif 1096 1097 // if debugging, check that all objects (except for disabled bodies, 1098 // unconnected joints, and joints that are connected to disabled bodies) 1099 // were tagged. 1100# ifndef dNODEBUG 1101 for (b = world->firstbody; b; b = (dxBody *) b->next) 1102 { 1103 if (b->flags & dxBodyDisabled) 1104 { 1105 if (b->tag) 1106 dDebug (0, "disabled body tagged"); 1107 } 1108 else 1109 { 1110 if (!b->tag) 1111 dDebug (0, "enabled body not tagged"); 1112 } 1113 } 1114 for (j = world->firstjoint; j; j = (dxJoint *) j->next) 1115 { 1116 if ((j->node[0].body && (j->node[0].body->flags & dxBodyDisabled) == 0) || (j->node[1].body && (j->node[1].body->flags & dxBodyDisabled) == 0)) 1117 { 1118 if (!j->tag) 1119 dDebug (0, "attached enabled joint not tagged"); 1120 } 1121 else 1122 { 1123 if (j->tag) 1124 dDebug (0, "unattached or disabled joint tagged"); 1125 } 1126 } 1127# endif 1128 1129# ifdef TIMING 1130 dTimerEnd (); 1131 dTimerReport (stdout, 1); 1132# endif 1133} 1134 1135#endif 1136 1137 1138void dWorldStepFast1 (dWorldID w, dReal stepsize, int maxiterations) 1139{ 1140 dUASSERT (w, "bad world argument"); 1141 dUASSERT (stepsize > 0, "stepsize must be > 0"); 1142 processIslandsFast (w, stepsize, maxiterations); 1143}