PageRenderTime 325ms CodeModel.GetById 27ms app.highlight 288ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/ode/ode_source/ode/src/stepfast.cpp

http://narutortsproject.googlecode.com/
C++ | 1143 lines | 816 code | 120 blank | 207 comment | 168 complexity | 21e3dd5c2709ef17758a92cf6579b3b1 MD5 | raw file
   1/*************************************************************************
   2 *                                                                       *
   3 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
   4 * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
   5 *                                                                       *
   6 * Fast iterative solver, David Whittaker. Email: david@csworkbench.com  *
   7 *                                                                       *
   8 * This library is free software; you can redistribute it and/or         *
   9 * modify it under the terms of EITHER:                                  *
  10 *   (1) The GNU Lesser General Public License as published by the Free  *
  11 *       Software Foundation; either version 2.1 of the License, or (at  *
  12 *       your option) any later version. The text of the GNU Lesser      *
  13 *       General Public License is included with this library in the     *
  14 *       file LICENSE.TXT.                                               *
  15 *   (2) The BSD-style license that is included with this library in     *
  16 *       the file LICENSE-BSD.TXT.                                       *
  17 *                                                                       *
  18 * This library is distributed in the hope that it will be useful,       *
  19 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
  20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
  21 * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
  22 *                                                                       *
  23 *************************************************************************/
  24
  25// This is the StepFast code by David Whittaker. This code is faster, but
  26// sometimes less stable than, the original "big matrix" code.
  27// Refer to the user's manual for more information.
  28// Note that this source file duplicates a lot of stuff from step.cpp,
  29// eventually we should move the common code to a third file.
  30
  31#include "objects.h"
  32#include "joints/joint.h"
  33#include <ode/odeconfig.h>
  34#include "config.h"
  35#include <ode/objects.h>
  36#include <ode/odemath.h>
  37#include <ode/rotation.h>
  38#include <ode/timer.h>
  39#include <ode/error.h>
  40#include <ode/matrix.h>
  41#include <ode/misc.h>
  42#include "lcp.h"
  43#include "step.h"
  44#include "util.h"
  45
  46
  47// misc defines
  48
  49#define ALLOCA dALLOCA16
  50
  51#define RANDOM_JOINT_ORDER
  52//#define FAST_FACTOR	//use a factorization approximation to the LCP solver (fast, theoretically less accurate)
  53#define SLOW_LCP      //use the old LCP solver
  54//#define NO_ISLANDS    //does not perform island creation code (3~4% of simulation time), body disabling doesn't work
  55//#define TIMING
  56
  57
  58static int autoEnableDepth = 2;
  59
  60void dWorldSetAutoEnableDepthSF1 (dxWorld *world, int autodepth)
  61{
  62	if (autodepth > 0)
  63		autoEnableDepth = autodepth;
  64	else
  65		autoEnableDepth = 0;
  66}
  67
  68int dWorldGetAutoEnableDepthSF1 (dxWorld *world)
  69{
  70	return autoEnableDepth;
  71}
  72
  73//little bit of math.... the _sym_ functions assume the return matrix will be symmetric
  74static void
  75Multiply2_sym_p8p (dReal * A, dReal * B, dReal * C, int p, int Askip)
  76{
  77	int i, j;
  78	dReal sum, *aa, *ad, *bb, *cc;
  79	dIASSERT (p > 0 && A && B && C);
  80	bb = B;
  81	for (i = 0; i < p; i++)
  82	{
  83		//aa is going accross the matrix, ad down
  84		aa = ad = A;
  85		cc = C;
  86		for (j = i; j < p; j++)
  87		{
  88			sum = bb[0] * cc[0];
  89			sum += bb[1] * cc[1];
  90			sum += bb[2] * cc[2];
  91			sum += bb[4] * cc[4];
  92			sum += bb[5] * cc[5];
  93			sum += bb[6] * cc[6];
  94			*(aa++) = *ad = sum;
  95			ad += Askip;
  96			cc += 8;
  97		}
  98		bb += 8;
  99		A += Askip + 1;
 100		C += 8;
 101	}
 102}
 103
 104static void
 105MultiplyAdd2_sym_p8p (dReal * A, dReal * B, dReal * C, int p, int Askip)
 106{
 107	int i, j;
 108	dReal sum, *aa, *ad, *bb, *cc;
 109	dIASSERT (p > 0 && A && B && C);
 110	bb = B;
 111	for (i = 0; i < p; i++)
 112	{
 113		//aa is going accross the matrix, ad down
 114		aa = ad = A;
 115		cc = C;
 116		for (j = i; j < p; j++)
 117		{
 118			sum = bb[0] * cc[0];
 119			sum += bb[1] * cc[1];
 120			sum += bb[2] * cc[2];
 121			sum += bb[4] * cc[4];
 122			sum += bb[5] * cc[5];
 123			sum += bb[6] * cc[6];
 124			*(aa++) += sum;
 125			*ad += sum;
 126			ad += Askip;
 127			cc += 8;
 128		}
 129		bb += 8;
 130		A += Askip + 1;
 131		C += 8;
 132	}
 133}
 134
 135
 136// this assumes the 4th and 8th rows of B are zero.
 137
 138static void
 139Multiply0_p81 (dReal * A, dReal * B, dReal * C, int p)
 140{
 141	int i;
 142	dIASSERT (p > 0 && A && B && C);
 143	dReal sum;
 144	for (i = p; i; i--)
 145	{
 146		sum = B[0] * C[0];
 147		sum += B[1] * C[1];
 148		sum += B[2] * C[2];
 149		sum += B[4] * C[4];
 150		sum += B[5] * C[5];
 151		sum += B[6] * C[6];
 152		*(A++) = sum;
 153		B += 8;
 154	}
 155}
 156
 157
 158// this assumes the 4th and 8th rows of B are zero.
 159
 160static void
 161MultiplyAdd0_p81 (dReal * A, dReal * B, dReal * C, int p)
 162{
 163	int i;
 164	dIASSERT (p > 0 && A && B && C);
 165	dReal sum;
 166	for (i = p; i; i--)
 167	{
 168		sum = B[0] * C[0];
 169		sum += B[1] * C[1];
 170		sum += B[2] * C[2];
 171		sum += B[4] * C[4];
 172		sum += B[5] * C[5];
 173		sum += B[6] * C[6];
 174		*(A++) += sum;
 175		B += 8;
 176	}
 177}
 178
 179
 180// this assumes the 4th and 8th rows of B are zero.
 181
 182static void
 183Multiply1_8q1 (dReal * A, dReal * B, dReal * C, int q)
 184{
 185	int k;
 186	dReal sum;
 187	dIASSERT (q > 0 && A && B && C);
 188	sum = 0;
 189	for (k = 0; k < q; k++)
 190		sum += B[k * 8] * C[k];
 191	A[0] = sum;
 192	sum = 0;
 193	for (k = 0; k < q; k++)
 194		sum += B[1 + k * 8] * C[k];
 195	A[1] = sum;
 196	sum = 0;
 197	for (k = 0; k < q; k++)
 198		sum += B[2 + k * 8] * C[k];
 199	A[2] = sum;
 200	sum = 0;
 201	for (k = 0; k < q; k++)
 202		sum += B[4 + k * 8] * C[k];
 203	A[4] = sum;
 204	sum = 0;
 205	for (k = 0; k < q; k++)
 206		sum += B[5 + k * 8] * C[k];
 207	A[5] = sum;
 208	sum = 0;
 209	for (k = 0; k < q; k++)
 210		sum += B[6 + k * 8] * C[k];
 211	A[6] = sum;
 212}
 213
 214//****************************************************************************
 215// body rotation
 216
 217// return sin(x)/x. this has a singularity at 0 so special handling is needed
 218// for small arguments.
 219
 220static inline dReal
 221sinc (dReal x)
 222{
 223	// if |x| < 1e-4 then use a taylor series expansion. this two term expansion
 224	// is actually accurate to one LS bit within this range if double precision
 225	// is being used - so don't worry!
 226	if (dFabs (x) < 1.0e-4)
 227		return REAL (1.0) - x * x * REAL (0.166666666666666666667);
 228	else
 229		return dSin (x) / x;
 230}
 231
 232#if 0 // this is just dxStepBody()
 233// given a body b, apply its linear and angular rotation over the time
 234// interval h, thereby adjusting its position and orientation.
 235
 236static inline void
 237moveAndRotateBody (dxBody * b, dReal h)
 238{
 239	int j;
 240
 241	// handle linear velocity
 242	for (j = 0; j < 3; j++)
 243		b->posr.pos[j] += h * b->lvel[j];
 244
 245	if (b->flags & dxBodyFlagFiniteRotation)
 246	{
 247		dVector3 irv;			// infitesimal rotation vector
 248		dQuaternion q;			// quaternion for finite rotation
 249
 250		if (b->flags & dxBodyFlagFiniteRotationAxis)
 251		{
 252			// split the angular velocity vector into a component along the finite
 253			// rotation axis, and a component orthogonal to it.
 254			dVector3 frv, irv;	// finite rotation vector
 255			dReal k = dDOT (b->finite_rot_axis, b->avel);
 256			frv[0] = b->finite_rot_axis[0] * k;
 257			frv[1] = b->finite_rot_axis[1] * k;
 258			frv[2] = b->finite_rot_axis[2] * k;
 259			irv[0] = b->avel[0] - frv[0];
 260			irv[1] = b->avel[1] - frv[1];
 261			irv[2] = b->avel[2] - frv[2];
 262
 263			// make a rotation quaternion q that corresponds to frv * h.
 264			// compare this with the full-finite-rotation case below.
 265			h *= REAL (0.5);
 266			dReal theta = k * h;
 267			q[0] = dCos (theta);
 268			dReal s = sinc (theta) * h;
 269			q[1] = frv[0] * s;
 270			q[2] = frv[1] * s;
 271			q[3] = frv[2] * s;
 272		}
 273		else
 274		{
 275			// make a rotation quaternion q that corresponds to w * h
 276			dReal wlen = dSqrt (b->avel[0] * b->avel[0] + b->avel[1] * b->avel[1] + b->avel[2] * b->avel[2]);
 277			h *= REAL (0.5);
 278			dReal theta = wlen * h;
 279			q[0] = dCos (theta);
 280			dReal s = sinc (theta) * h;
 281			q[1] = b->avel[0] * s;
 282			q[2] = b->avel[1] * s;
 283			q[3] = b->avel[2] * s;
 284		}
 285
 286		// do the finite rotation
 287		dQuaternion q2;
 288		dQMultiply0 (q2, q, b->q);
 289		for (j = 0; j < 4; j++)
 290			b->q[j] = q2[j];
 291
 292		// do the infitesimal rotation if required
 293		if (b->flags & dxBodyFlagFiniteRotationAxis)
 294		{
 295			dReal dq[4];
 296			dWtoDQ (irv, b->q, dq);
 297			for (j = 0; j < 4; j++)
 298				b->q[j] += h * dq[j];
 299		}
 300	}
 301	else
 302	{
 303		// the normal way - do an infitesimal rotation
 304		dReal dq[4];
 305		dWtoDQ (b->avel, b->q, dq);
 306		for (j = 0; j < 4; j++)
 307			b->q[j] += h * dq[j];
 308	}
 309
 310	// normalize the quaternion and convert it to a rotation matrix
 311	dNormalize4 (b->q);
 312	dQtoR (b->q, b->posr.R);
 313
 314	// notify all attached geoms that this body has moved
 315	for (dxGeom * geom = b->geom; geom; geom = dGeomGetBodyNext (geom))
 316		dGeomMoved (geom);
 317}
 318#endif
 319
 320//****************************************************************************
 321//This is an implementation of the iterated/relaxation algorithm.
 322//Here is a quick overview of the algorithm per Sergi Valverde's posts to the
 323//mailing list:
 324//
 325//  for i=0..N-1 do
 326//      for c = 0..C-1 do
 327//          Solve constraint c-th
 328//          Apply forces to constraint bodies
 329//      next
 330//  next
 331//  Integrate bodies
 332
 333void
 334dInternalStepFast (dxWorld * world, dxBody * body[2], dReal * GI[2], dReal * GinvI[2], dxJoint * joint, dxJoint::Info1 info, dxJoint::Info2 Jinfo, dReal stepsize)
 335{
 336	int i, j, k;
 337# ifdef TIMING
 338	dTimerNow ("constraint preprocessing");
 339# endif
 340
 341	dReal stepsize1 = dRecip (stepsize);
 342
 343	int m = info.m;
 344	// nothing to do if no constraints.
 345	if (m <= 0)
 346		return;
 347
 348	int nub = 0;
 349	if (info.nub == info.m)
 350		nub = m;
 351
 352	// compute A = J*invM*J'. first compute JinvM = J*invM. this has the same
 353	// format as J so we just go through the constraints in J multiplying by
 354	// the appropriate scalars and matrices.
 355#   ifdef TIMING
 356	dTimerNow ("compute A");
 357#   endif
 358	dReal JinvM[2 * 6 * 8];
 359	//dSetZero (JinvM, 2 * m * 8);
 360
 361	dReal *Jsrc = Jinfo.J1l;
 362	dReal *Jdst = JinvM;
 363	if (body[0])
 364	{
 365		for (j = m - 1; j >= 0; j--)
 366		{
 367			for (k = 0; k < 3; k++)
 368				Jdst[k] = Jsrc[k] * body[0]->invMass;
 369			dMULTIPLY0_133 (Jdst + 4, Jsrc + 4, GinvI[0]);
 370			Jsrc += 8;
 371			Jdst += 8;
 372		}
 373	}
 374	if (body[1])
 375	{
 376		Jsrc = Jinfo.J2l;
 377		Jdst = JinvM + 8 * m;
 378		for (j = m - 1; j >= 0; j--)
 379		{
 380			for (k = 0; k < 3; k++)
 381				Jdst[k] = Jsrc[k] * body[1]->invMass;
 382			dMULTIPLY0_133 (Jdst + 4, Jsrc + 4, GinvI[1]);
 383			Jsrc += 8;
 384			Jdst += 8;
 385		}
 386	}
 387
 388
 389	// now compute A = JinvM * J'.
 390	int mskip = dPAD (m);
 391	dReal A[6 * 8];
 392	//dSetZero (A, 6 * 8);
 393
 394	if (body[0]) {
 395		Multiply2_sym_p8p (A, JinvM, Jinfo.J1l, m, mskip);
 396		if (body[1])
 397			MultiplyAdd2_sym_p8p (A, JinvM + 8 * m, Jinfo.J2l,
 398                                              m, mskip);
 399	} else {
 400		if (body[1])
 401			Multiply2_sym_p8p (A, JinvM + 8 * m, Jinfo.J2l,
 402                                           m, mskip);
 403	}
 404
 405	// add cfm to the diagonal of A
 406	for (i = 0; i < m; i++)
 407		A[i * mskip + i] += Jinfo.cfm[i] * stepsize1;
 408
 409	// compute the right hand side `rhs'
 410#   ifdef TIMING
 411	dTimerNow ("compute rhs");
 412#   endif
 413	dReal tmp1[16];
 414	//dSetZero (tmp1, 16);
 415	// put v/h + invM*fe into tmp1
 416	for (i = 0; i < 2; i++)
 417	{
 418		if (!body[i])
 419			continue;
 420		for (j = 0; j < 3; j++)
 421			tmp1[i * 8 + j] = body[i]->facc[j] * body[i]->invMass + body[i]->lvel[j] * stepsize1;
 422		dMULTIPLY0_331 (tmp1 + i * 8 + 4, GinvI[i], body[i]->tacc);
 423		for (j = 0; j < 3; j++)
 424			tmp1[i * 8 + 4 + j] += body[i]->avel[j] * stepsize1;
 425	}
 426	// put J*tmp1 into rhs
 427	dReal rhs[6];
 428	//dSetZero (rhs, 6);
 429
 430	if (body[0]) {
 431		Multiply0_p81 (rhs, Jinfo.J1l, tmp1, m);
 432		if (body[1])
 433			MultiplyAdd0_p81 (rhs, Jinfo.J2l, tmp1 + 8, m);
 434	} else {
 435		if (body[1])
 436			Multiply0_p81 (rhs, Jinfo.J2l, tmp1 + 8, m);
 437	}
 438
 439	// complete rhs
 440	for (i = 0; i < m; i++)
 441		rhs[i] = Jinfo.c[i] * stepsize1 - rhs[i];
 442
 443#ifdef SLOW_LCP
 444	// solve the LCP problem and get lambda.
 445	// this will destroy A but that's okay
 446#	ifdef TIMING
 447	dTimerNow ("solving LCP problem");
 448#	endif
 449	dReal *lambda = (dReal *) ALLOCA (m * sizeof (dReal));
 450	dReal *residual = (dReal *) ALLOCA (m * sizeof (dReal));
 451	dReal lo[6], hi[6];
 452	memcpy (lo, Jinfo.lo, m * sizeof (dReal));
 453	memcpy (hi, Jinfo.hi, m * sizeof (dReal));
 454	dSolveLCP (m, A, lambda, rhs, residual, nub, lo, hi, Jinfo.findex);
 455#endif
 456
 457	// LCP Solver replacement:
 458	// This algorithm goes like this:
 459	// Do a straightforward LDLT factorization of the matrix A, solving for
 460	// A*x = rhs
 461	// For each x[i] that is outside of the bounds of lo[i] and hi[i],
 462	//    clamp x[i] into that range.
 463	//    Substitute into A the now known x's
 464	//    subtract the residual away from the rhs.
 465	//    Remove row and column i from L, updating the factorization
 466	//    place the known x's at the end of the array, keeping up with location in p
 467	// Repeat until all constraints have been clamped or all are within bounds
 468	//
 469	// This is probably only faster in the single joint case where only one repeat is
 470	// the norm.
 471
 472#ifdef FAST_FACTOR
 473	// factorize A (L*D*L'=A)
 474#	ifdef TIMING
 475	dTimerNow ("factorize A");
 476#	endif
 477	dReal d[6];
 478	dReal L[6 * 8];
 479	memcpy (L, A, m * mskip * sizeof (dReal));
 480	dFactorLDLT (L, d, m, mskip);
 481
 482	// compute lambda
 483#	ifdef TIMING
 484	dTimerNow ("compute lambda");
 485#	endif
 486
 487	int left = m;				//constraints left to solve.
 488	int remove[6];
 489	dReal lambda[6];
 490	dReal x[6];
 491	int p[6];
 492	for (i = 0; i < 6; i++)
 493		p[i] = i;
 494	while (true)
 495	{
 496		memcpy (x, rhs, left * sizeof (dReal));
 497		dSolveLDLT (L, d, x, left, mskip);
 498
 499		int fixed = 0;
 500		for (i = 0; i < left; i++)
 501		{
 502			j = p[i];
 503			remove[i] = false;
 504			// This isn't the exact same use of findex as dSolveLCP.... since x[findex]
 505			// may change after I've already clamped x[i], but it should be close
 506			if (Jinfo.findex[j] > -1)
 507			{
 508				dReal f = fabs (Jinfo.hi[j] * x[p[Jinfo.findex[j]]]);
 509				if (x[i] > f)
 510					x[i] = f;
 511				else if (x[i] < -f)
 512					x[i] = -f;
 513				else
 514					continue;
 515			}
 516			else
 517			{
 518				if (x[i] > Jinfo.hi[j])
 519					x[i] = Jinfo.hi[j];
 520				else if (x[i] < Jinfo.lo[j])
 521					x[i] = Jinfo.lo[j];
 522				else
 523					continue;
 524			}
 525			remove[i] = true;
 526			fixed++;
 527		}
 528		if (fixed == 0 || fixed == left)	//no change or all constraints solved
 529			break;
 530
 531		for (i = 0; i < left; i++)	//sub in to right hand side.
 532			if (remove[i])
 533				for (j = 0; j < left; j++)
 534					if (!remove[j])
 535						rhs[j] -= A[j * mskip + i] * x[i];
 536
 537		for (int r = left - 1; r >= 0; r--)	//eliminate row/col for fixed variables
 538		{
 539			if (remove[r])
 540			{
 541				//dRemoveLDLT adapted for use without row pointers.
 542				if (r == left - 1)
 543				{
 544					left--;
 545					continue;	// deleting last row/col is easy
 546				}
 547				else if (r == 0)
 548				{
 549					dReal a[6];
 550					for (i = 0; i < left; i++)
 551						a[i] = -A[i * mskip];
 552					a[0] += REAL (1.0);
 553					dLDLTAddTL (L, d, a, left, mskip);
 554				}
 555				else
 556				{
 557					dReal t[6];
 558					dReal a[6];
 559					for (i = 0; i < r; i++)
 560						t[i] = L[r * mskip + i] / d[i];
 561					for (i = 0; i < left - r; i++)
 562						a[i] = dDot (L + (r + i) * mskip, t, r) - A[(r + i) * mskip + r];
 563					a[0] += REAL (1.0);
 564					dLDLTAddTL (L + r * mskip + r, d + r, a, left - r, mskip);
 565				}
 566
 567				dRemoveRowCol (L, left, mskip, r);
 568				//end dRemoveLDLT
 569
 570				left--;
 571				if (r < (left - 1))
 572				{
 573					dReal tx = x[r];
 574					memmove (d + r, d + r + 1, (left - r) * sizeof (dReal));
 575					memmove (rhs + r, rhs + r + 1, (left - r) * sizeof (dReal));
 576					//x will get written over by rhs anyway, no need to move it around
 577					//just store the fixed value we just discovered in it.
 578					x[left] = tx;
 579					for (i = 0; i < m; i++)
 580						if (p[i] > r && p[i] <= left)
 581							p[i]--;
 582					p[r] = left;
 583				}
 584			}
 585		}
 586	}
 587
 588	for (i = 0; i < m; i++)
 589		lambda[i] = x[p[i]];
 590#	endif
 591	// compute the constraint force `cforce'
 592#	ifdef TIMING
 593	dTimerNow ("compute constraint force");
 594#endif
 595
 596	// compute cforce = J'*lambda
 597	dJointFeedback *fb = joint->feedback;
 598	dReal cforce[16];
 599	//dSetZero (cforce, 16);
 600
 601	if (fb)
 602	{
 603		// the user has requested feedback on the amount of force that this
 604		// joint is applying to the bodies. we use a slightly slower
 605		// computation that splits out the force components and puts them
 606		// in the feedback structure.
 607		dReal data1[8], data2[8];
 608		if (body[0])
 609		{
 610			Multiply1_8q1 (data1, Jinfo.J1l, lambda, m);
 611			dReal *cf1 = cforce;
 612			cf1[0] = (fb->f1[0] = data1[0]);
 613			cf1[1] = (fb->f1[1] = data1[1]);
 614			cf1[2] = (fb->f1[2] = data1[2]);
 615			cf1[4] = (fb->t1[0] = data1[4]);
 616			cf1[5] = (fb->t1[1] = data1[5]);
 617			cf1[6] = (fb->t1[2] = data1[6]);
 618		}
 619		if (body[1])
 620		{
 621			Multiply1_8q1 (data2, Jinfo.J2l, lambda, m);
 622			dReal *cf2 = cforce + 8;
 623			cf2[0] = (fb->f2[0] = data2[0]);
 624			cf2[1] = (fb->f2[1] = data2[1]);
 625			cf2[2] = (fb->f2[2] = data2[2]);
 626			cf2[4] = (fb->t2[0] = data2[4]);
 627			cf2[5] = (fb->t2[1] = data2[5]);
 628			cf2[6] = (fb->t2[2] = data2[6]);
 629		}
 630	}
 631	else
 632	{
 633		// no feedback is required, let's compute cforce the faster way
 634		if (body[0])
 635			Multiply1_8q1 (cforce, Jinfo.J1l, lambda, m);
 636		if (body[1])
 637			Multiply1_8q1 (cforce + 8, Jinfo.J2l, lambda, m);
 638	}
 639
 640	for (i = 0; i < 2; i++)
 641	{
 642		if (!body[i])
 643			continue;
 644		for (j = 0; j < 3; j++)
 645		{
 646			body[i]->facc[j] += cforce[i * 8 + j];
 647			body[i]->tacc[j] += cforce[i * 8 + 4 + j];
 648		}
 649	}
 650}
 651
 652void
 653dInternalStepIslandFast (dxWorld * world, dxBody * const *bodies, int nb, dxJoint * const *_joints, int nj, dReal stepsize, int maxiterations)
 654{
 655#   ifdef TIMING
 656	dTimerNow ("preprocessing");
 657#   endif
 658	dxBody *bodyPair[2], *body;
 659	dReal *GIPair[2], *GinvIPair[2];
 660	dxJoint *joint;
 661	int iter, b, j, i;
 662	dReal ministep = stepsize / maxiterations;
 663
 664	// make a local copy of the joint array, because we might want to modify it.
 665	// (the "dxJoint *const*" declaration says we're allowed to modify the joints
 666	// but not the joint array, because the caller might need it unchanged).
 667	dxJoint **joints = (dxJoint **) ALLOCA (nj * sizeof (dxJoint *));
 668	memcpy (joints, _joints, nj * sizeof (dxJoint *));
 669
 670	// get m = total constraint dimension, nub = number of unbounded variables.
 671	// create constraint offset array and number-of-rows array for all joints.
 672	// the constraints are re-ordered as follows: the purely unbounded
 673	// constraints, the mixed unbounded + LCP constraints, and last the purely
 674	// LCP constraints. this assists the LCP solver to put all unbounded
 675	// variables at the start for a quick factorization.
 676	//
 677	// joints with m=0 are inactive and are removed from the joints array
 678	// entirely, so that the code that follows does not consider them.
 679	// also number all active joints in the joint list (set their tag values).
 680	// inactive joints receive a tag value of -1.
 681
 682	int m = 0;
 683	dxJoint::Info1 * info = (dxJoint::Info1 *) ALLOCA (nj * sizeof (dxJoint::Info1));
 684	int *ofs = (int *) ALLOCA (nj * sizeof (int));
 685	for (i = 0, j = 0; j < nj; j++)
 686	{	// i=dest, j=src
 687		joints[j]->getInfo1 (info + i);
 688		dIASSERT (info[i].m >= 0 && info[i].m <= 6 && info[i].nub >= 0 && info[i].nub <= info[i].m);
 689		if (info[i].m > 0)
 690		{
 691			joints[i] = joints[j];
 692			joints[i]->tag = i;
 693			i++;
 694		}
 695		else
 696		{
 697			joints[j]->tag = -1;
 698		}
 699	}
 700	nj = i;
 701
 702	// the purely unbounded constraints
 703	for (i = 0; i < nj; i++)
 704	{
 705		ofs[i] = m;
 706		m += info[i].m;
 707	}
 708	dReal *c = NULL;
 709	dReal *cfm = NULL;
 710	dReal *lo = NULL;
 711	dReal *hi = NULL;
 712	int *findex = NULL;
 713
 714	dReal *J = NULL;
 715	dxJoint::Info2 * Jinfo = NULL;
 716
 717	if (m)
 718	{
 719	// create a constraint equation right hand side vector `c', a constraint
 720	// force mixing vector `cfm', and LCP low and high bound vectors, and an
 721	// 'findex' vector.
 722		c = (dReal *) ALLOCA (m * sizeof (dReal));
 723		cfm = (dReal *) ALLOCA (m * sizeof (dReal));
 724		lo = (dReal *) ALLOCA (m * sizeof (dReal));
 725		hi = (dReal *) ALLOCA (m * sizeof (dReal));
 726		findex = (int *) ALLOCA (m * sizeof (int));
 727	dSetZero (c, m);
 728	dSetValue (cfm, m, world->global_cfm);
 729	dSetValue (lo, m, -dInfinity);
 730	dSetValue (hi, m, dInfinity);
 731	for (i = 0; i < m; i++)
 732		findex[i] = -1;
 733
 734	// get jacobian data from constraints. a (2*m)x8 matrix will be created
 735	// to store the two jacobian blocks from each constraint. it has this
 736	// format:
 737	//
 738	//   l l l 0 a a a 0  \    .
 739	//   l l l 0 a a a 0   }-- jacobian body 1 block for joint 0 (3 rows)
 740	//   l l l 0 a a a 0  /
 741	//   l l l 0 a a a 0  \    .
 742	//   l l l 0 a a a 0   }-- jacobian body 2 block for joint 0 (3 rows)
 743	//   l l l 0 a a a 0  /
 744	//   l l l 0 a a a 0  }--- jacobian body 1 block for joint 1 (1 row)
 745	//   l l l 0 a a a 0  }--- jacobian body 2 block for joint 1 (1 row)
 746	//   etc...
 747	//
 748	//   (lll) = linear jacobian data
 749	//   (aaa) = angular jacobian data
 750	//
 751#   ifdef TIMING
 752	dTimerNow ("create J");
 753#   endif
 754		J = (dReal *) ALLOCA (2 * m * 8 * sizeof (dReal));
 755		dSetZero (J, 2 * m * 8);
 756		Jinfo = (dxJoint::Info2 *) ALLOCA (nj * sizeof (dxJoint::Info2));
 757	for (i = 0; i < nj; i++)
 758	{
 759		Jinfo[i].rowskip = 8;
 760		Jinfo[i].fps = dRecip (stepsize);
 761		Jinfo[i].erp = world->global_erp;
 762		Jinfo[i].J1l = J + 2 * 8 * ofs[i];
 763		Jinfo[i].J1a = Jinfo[i].J1l + 4;
 764		Jinfo[i].J2l = Jinfo[i].J1l + 8 * info[i].m;
 765		Jinfo[i].J2a = Jinfo[i].J2l + 4;
 766		Jinfo[i].c = c + ofs[i];
 767		Jinfo[i].cfm = cfm + ofs[i];
 768		Jinfo[i].lo = lo + ofs[i];
 769		Jinfo[i].hi = hi + ofs[i];
 770		Jinfo[i].findex = findex + ofs[i];
 771		//joints[i]->getInfo2 (Jinfo+i);
 772	}
 773
 774	}
 775
 776	dReal *saveFacc = (dReal *) ALLOCA (nb * 4 * sizeof (dReal));
 777	dReal *saveTacc = (dReal *) ALLOCA (nb * 4 * sizeof (dReal));
 778	dReal *globalI = (dReal *) ALLOCA (nb * 12 * sizeof (dReal));
 779	dReal *globalInvI = (dReal *) ALLOCA (nb * 12 * sizeof (dReal));
 780	for (b = 0; b < nb; b++)
 781	{
 782		for (i = 0; i < 4; i++)
 783		{
 784			saveFacc[b * 4 + i] = bodies[b]->facc[i];
 785			saveTacc[b * 4 + i] = bodies[b]->tacc[i];
 786		}
 787                bodies[b]->tag = b;
 788	}
 789
 790	for (iter = 0; iter < maxiterations; iter++)
 791	{
 792#	ifdef TIMING
 793		dTimerNow ("applying inertia and gravity");
 794#	endif
 795		dReal tmp[12] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
 796
 797		for (b = 0; b < nb; b++)
 798		{
 799			body = bodies[b];
 800
 801			// for all bodies, compute the inertia tensor and its inverse in the global
 802			// frame, and compute the rotational force and add it to the torque
 803			// accumulator. I and invI are vertically stacked 3x4 matrices, one per body.
 804			// @@@ check computation of rotational force.
 805
 806			// compute inertia tensor in global frame
 807			dMULTIPLY2_333 (tmp, body->mass.I, body->posr.R);
 808			dMULTIPLY0_333 (globalI + b * 12, body->posr.R, tmp);
 809			// compute inverse inertia tensor in global frame
 810			dMULTIPLY2_333 (tmp, body->invI, body->posr.R);
 811			dMULTIPLY0_333 (globalInvI + b * 12, body->posr.R, tmp);
 812
 813			for (i = 0; i < 4; i++)
 814				body->tacc[i] = saveTacc[b * 4 + i];
 815                
 816            if (body->flags & dxBodyGyroscopic) {
 817                // DanielKO: this doesn't look right/efficient, but anyways...
 818    			// compute rotational force
 819    			dMULTIPLY0_331 (tmp, globalI + b * 12, body->avel);
 820        		dCROSS (body->tacc, -=, body->avel, tmp);
 821            }
 822
 823			// add the gravity force to all bodies
 824			if ((body->flags & dxBodyNoGravity) == 0)
 825			{
 826				body->facc[0] = saveFacc[b * 4 + 0] + body->mass.mass * world->gravity[0];
 827				body->facc[1] = saveFacc[b * 4 + 1] + body->mass.mass * world->gravity[1];
 828				body->facc[2] = saveFacc[b * 4 + 2] + body->mass.mass * world->gravity[2];
 829				body->facc[3] = 0;
 830			} else {
 831                                body->facc[0] = saveFacc[b * 4 + 0];
 832                                body->facc[1] = saveFacc[b * 4 + 1];
 833                                body->facc[2] = saveFacc[b * 4 + 2];
 834				body->facc[3] = 0;
 835                        }
 836
 837		}
 838
 839#ifdef RANDOM_JOINT_ORDER
 840#ifdef TIMING
 841		dTimerNow ("randomizing joint order");
 842#endif
 843		//randomize the order of the joints by looping through the array
 844		//and swapping the current joint pointer with a random one before it.
 845		for (j = 0; j < nj; j++)
 846		{
 847			joint = joints[j];
 848			dxJoint::Info1 i1 = info[j];
 849			dxJoint::Info2 i2 = Jinfo[j];
 850                        const int r = dRandInt(j+1);
 851			dIASSERT (r < nj);
 852			joints[j] = joints[r];
 853			info[j] = info[r];
 854			Jinfo[j] = Jinfo[r];
 855			joints[r] = joint;
 856			info[r] = i1;
 857			Jinfo[r] = i2;
 858		}
 859#endif
 860
 861		//now iterate through the random ordered joint array we created.
 862		for (j = 0; j < nj; j++)
 863		{
 864#ifdef TIMING
 865			dTimerNow ("setting up joint");
 866#endif
 867			joint = joints[j];
 868			bodyPair[0] = joint->node[0].body;
 869			bodyPair[1] = joint->node[1].body;
 870
 871			if (bodyPair[0] && (bodyPair[0]->flags & dxBodyDisabled))
 872				bodyPair[0] = 0;
 873			if (bodyPair[1] && (bodyPair[1]->flags & dxBodyDisabled))
 874				bodyPair[1] = 0;
 875			
 876			//if this joint is not connected to any enabled bodies, skip it.
 877			if (!bodyPair[0] && !bodyPair[1])
 878				continue;
 879			
 880			if (bodyPair[0])
 881			{
 882				GIPair[0] = globalI + bodyPair[0]->tag * 12;
 883				GinvIPair[0] = globalInvI + bodyPair[0]->tag * 12;
 884			}
 885			if (bodyPair[1])
 886			{
 887				GIPair[1] = globalI + bodyPair[1]->tag * 12;
 888				GinvIPair[1] = globalInvI + bodyPair[1]->tag * 12;
 889			}
 890
 891			joints[j]->getInfo2 (Jinfo + j);
 892
 893			//dInternalStepIslandFast is an exact copy of the old routine with one
 894			//modification: the calculated forces are added back to the facc and tacc
 895			//vectors instead of applying them to the bodies and moving them.
 896			if (info[j].m > 0)
 897			{
 898			dInternalStepFast (world, bodyPair, GIPair, GinvIPair, joint, info[j], Jinfo[j], ministep);
 899			}		
 900		}
 901		//  }
 902#	ifdef TIMING
 903		dTimerNow ("moving bodies");
 904#	endif
 905		//Now we can simulate all the free floating bodies, and move them.
 906		for (b = 0; b < nb; b++)
 907		{
 908			body = bodies[b];
 909
 910			for (i = 0; i < 4; i++)
 911			{
 912				body->facc[i] *= ministep;
 913				body->tacc[i] *= ministep;
 914			}
 915
 916			//apply torque
 917			dMULTIPLYADD0_331 (body->avel, globalInvI + b * 12, body->tacc);
 918
 919			//apply force
 920			for (i = 0; i < 3; i++)
 921				body->lvel[i] += body->invMass * body->facc[i];
 922
 923			//move It!
 924			dxStepBody (body, ministep);
 925		}
 926	}
 927	for (b = 0; b < nb; b++)
 928		for (j = 0; j < 4; j++)
 929			bodies[b]->facc[j] = bodies[b]->tacc[j] = 0;
 930}
 931
 932
 933#ifdef NO_ISLANDS
 934
 935// Since the iterative algorithm doesn't care about islands of bodies, this is a
 936// faster algorithm that just sends it all the joints and bodies in one array.
 937// It's downfall is it's inability to handle disabled bodies as well as the old one.
 938static void
 939processIslandsFast (dxWorld * world, dReal stepsize, int maxiterations)
 940{
 941	// nothing to do if no bodies
 942	if (world->nb <= 0)
 943		return;
 944
 945	dInternalHandleAutoDisabling (world,stepsize);
 946
 947#	ifdef TIMING
 948	dTimerStart ("creating joint and body arrays");
 949#	endif
 950	dxBody **bodies, *body;
 951	dxJoint **joints, *joint;
 952	joints = (dxJoint **) ALLOCA (world->nj * sizeof (dxJoint *));
 953	bodies = (dxBody **) ALLOCA (world->nb * sizeof (dxBody *));
 954
 955	int nj = 0;
 956	for (joint = world->firstjoint; joint; joint = (dxJoint *) joint->next)
 957		joints[nj++] = joint;
 958
 959	int nb = 0;
 960	for (body = world->firstbody; body; body = (dxBody *) body->next)
 961		bodies[nb++] = body;
 962
 963	dInternalStepIslandFast (world, bodies, nb, joints, nj, stepsize, maxiterations);
 964#	ifdef TIMING
 965	dTimerEnd ();
 966	dTimerReport (stdout, 1);
 967#	endif
 968}
 969
 970#else
 971
 972//****************************************************************************
 973// island processing
 974
 975// this groups all joints and bodies in a world into islands. all objects
 976// in an island are reachable by going through connected bodies and joints.
 977// each island can be simulated separately.
 978// note that joints that are not attached to anything will not be included
 979// in any island, an so they do not affect the simulation.
 980//
 981// this function starts new island from unvisited bodies. however, it will
 982// never start a new islands from a disabled body. thus islands of disabled
 983// bodies will not be included in the simulation. disabled bodies are
 984// re-enabled if they are found to be part of an active island.
 985
 986static void
 987processIslandsFast (dxWorld * world, dReal stepsize, int maxiterations)
 988{
 989#ifdef TIMING
 990	dTimerStart ("Island Setup");
 991#endif
 992	dxBody *b, *bb, **body;
 993	dxJoint *j, **joint;
 994
 995	// nothing to do if no bodies
 996	if (world->nb <= 0)
 997		return;
 998
 999	dInternalHandleAutoDisabling (world,stepsize);
1000
1001	// make arrays for body and joint lists (for a single island) to go into
1002	body = (dxBody **) ALLOCA (world->nb * sizeof (dxBody *));
1003	joint = (dxJoint **) ALLOCA (world->nj * sizeof (dxJoint *));
1004	int bcount = 0;				// number of bodies in `body'
1005	int jcount = 0;				// number of joints in `joint'
1006	int tbcount = 0;
1007	int tjcount = 0;
1008	
1009	// set all body/joint tags to 0
1010	for (b = world->firstbody; b; b = (dxBody *) b->next)
1011		b->tag = 0;
1012	for (j = world->firstjoint; j; j = (dxJoint *) j->next)
1013		j->tag = 0;
1014
1015	// allocate a stack of unvisited bodies in the island. the maximum size of
1016	// the stack can be the lesser of the number of bodies or joints, because
1017	// new bodies are only ever added to the stack by going through untagged
1018	// joints. all the bodies in the stack must be tagged!
1019	int stackalloc = (world->nj < world->nb) ? world->nj : world->nb;
1020	dxBody **stack = (dxBody **) ALLOCA (stackalloc * sizeof (dxBody *));
1021	int *autostack = (int *) ALLOCA (stackalloc * sizeof (int));
1022
1023	for (bb = world->firstbody; bb; bb = (dxBody *) bb->next)
1024	{
1025#ifdef TIMING
1026		dTimerNow ("Island Processing");
1027#endif
1028		// get bb = the next enabled, untagged body, and tag it
1029		if (bb->tag || (bb->flags & dxBodyDisabled) || (bb->invMass == 0))
1030			continue;
1031		bb->tag = 1;
1032
1033		// tag all bodies and joints starting from bb.
1034		int stacksize = 0;
1035		int autoDepth = autoEnableDepth;
1036		b = bb;
1037		body[0] = bb;
1038		bcount = 1;
1039		jcount = 0;
1040		goto quickstart;
1041		while (stacksize > 0)
1042		{
1043			b = stack[--stacksize];	// pop body off stack
1044			autoDepth = autostack[stacksize];
1045			body[bcount++] = b;	// put body on body list
1046		  quickstart:
1047
1048			// traverse and tag all body's joints, add untagged connected bodies
1049			// to stack
1050			for (dxJointNode * n = b->firstjoint; n; n = n->next)
1051			{
1052				if (!n->joint->tag)
1053				{
1054					int thisDepth = autoEnableDepth;
1055					n->joint->tag = 1;
1056					joint[jcount++] = n->joint;
1057					if (n->body && !n->body->tag)
1058					{
1059						if (n->body->flags & dxBodyDisabled)
1060							thisDepth = autoDepth - 1;
1061						if (thisDepth < 0)
1062							continue;
1063						n->body->flags &= ~dxBodyDisabled;
1064						n->body->tag = 1;
1065						autostack[stacksize] = thisDepth;
1066						stack[stacksize++] = n->body;
1067					}
1068				}
1069			}
1070			dIASSERT (stacksize <= world->nb);
1071			dIASSERT (stacksize <= world->nj);
1072		}
1073
1074		// now do something with body and joint lists
1075		dInternalStepIslandFast (world, body, bcount, joint, jcount, stepsize, maxiterations);
1076
1077		// what we've just done may have altered the body/joint tag values.
1078		// we must make sure that these tags are nonzero.
1079		// also make sure all bodies are in the enabled state.
1080		int i;
1081		for (i = 0; i < bcount; i++)
1082		{
1083			body[i]->tag = 1;
1084			body[i]->flags &= ~dxBodyDisabled;
1085		}
1086		for (i = 0; i < jcount; i++)
1087			joint[i]->tag = 1;
1088		
1089		tbcount += bcount;
1090		tjcount += jcount;
1091	}
1092	
1093#ifdef TIMING
1094	dMessage(0, "Total joints processed: %i, bodies: %i", tjcount, tbcount);
1095#endif
1096
1097	// if debugging, check that all objects (except for disabled bodies,
1098	// unconnected joints, and joints that are connected to disabled bodies)
1099	// were tagged.
1100# ifndef dNODEBUG
1101	for (b = world->firstbody; b; b = (dxBody *) b->next)
1102	{
1103		if (b->flags & dxBodyDisabled)
1104		{
1105			if (b->tag)
1106				dDebug (0, "disabled body tagged");
1107		}
1108		else
1109		{
1110			if (!b->tag)
1111				dDebug (0, "enabled body not tagged");
1112		}
1113	}
1114	for (j = world->firstjoint; j; j = (dxJoint *) j->next)
1115	{
1116		if ((j->node[0].body && (j->node[0].body->flags & dxBodyDisabled) == 0) || (j->node[1].body && (j->node[1].body->flags & dxBodyDisabled) == 0))
1117		{
1118			if (!j->tag)
1119				dDebug (0, "attached enabled joint not tagged");
1120		}
1121		else
1122		{
1123			if (j->tag)
1124				dDebug (0, "unattached or disabled joint tagged");
1125		}
1126	}
1127# endif
1128
1129#	ifdef TIMING
1130	dTimerEnd ();
1131	dTimerReport (stdout, 1);
1132#	endif
1133}
1134
1135#endif
1136
1137
1138void dWorldStepFast1 (dWorldID w, dReal stepsize, int maxiterations)
1139{
1140	dUASSERT (w, "bad world argument");
1141	dUASSERT (stepsize > 0, "stepsize must be > 0");
1142	processIslandsFast (w, stepsize, maxiterations);
1143}