/lib/ode/ode_source/ode/src/stepfast.cpp

http://narutortsproject.googlecode.com/ · C++ · 1143 lines · 816 code · 120 blank · 207 comment · 168 complexity · 21e3dd5c2709ef17758a92cf6579b3b1 MD5 · raw file

  1. /*************************************************************************
  2. * *
  3. * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
  4. * All rights reserved. Email: russ@q12.org Web: www.q12.org *
  5. * *
  6. * Fast iterative solver, David Whittaker. Email: david@csworkbench.com *
  7. * *
  8. * This library is free software; you can redistribute it and/or *
  9. * modify it under the terms of EITHER: *
  10. * (1) The GNU Lesser General Public License as published by the Free *
  11. * Software Foundation; either version 2.1 of the License, or (at *
  12. * your option) any later version. The text of the GNU Lesser *
  13. * General Public License is included with this library in the *
  14. * file LICENSE.TXT. *
  15. * (2) The BSD-style license that is included with this library in *
  16. * the file LICENSE-BSD.TXT. *
  17. * *
  18. * This library is distributed in the hope that it will be useful, *
  19. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  20. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
  21. * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
  22. * *
  23. *************************************************************************/
  24. // This is the StepFast code by David Whittaker. This code is faster, but
  25. // sometimes less stable than, the original "big matrix" code.
  26. // Refer to the user's manual for more information.
  27. // Note that this source file duplicates a lot of stuff from step.cpp,
  28. // eventually we should move the common code to a third file.
  29. #include "objects.h"
  30. #include "joints/joint.h"
  31. #include <ode/odeconfig.h>
  32. #include "config.h"
  33. #include <ode/objects.h>
  34. #include <ode/odemath.h>
  35. #include <ode/rotation.h>
  36. #include <ode/timer.h>
  37. #include <ode/error.h>
  38. #include <ode/matrix.h>
  39. #include <ode/misc.h>
  40. #include "lcp.h"
  41. #include "step.h"
  42. #include "util.h"
  43. // misc defines
  44. #define ALLOCA dALLOCA16
  45. #define RANDOM_JOINT_ORDER
  46. //#define FAST_FACTOR //use a factorization approximation to the LCP solver (fast, theoretically less accurate)
  47. #define SLOW_LCP //use the old LCP solver
  48. //#define NO_ISLANDS //does not perform island creation code (3~4% of simulation time), body disabling doesn't work
  49. //#define TIMING
  50. static int autoEnableDepth = 2;
  51. void dWorldSetAutoEnableDepthSF1 (dxWorld *world, int autodepth)
  52. {
  53. if (autodepth > 0)
  54. autoEnableDepth = autodepth;
  55. else
  56. autoEnableDepth = 0;
  57. }
  58. int dWorldGetAutoEnableDepthSF1 (dxWorld *world)
  59. {
  60. return autoEnableDepth;
  61. }
  62. //little bit of math.... the _sym_ functions assume the return matrix will be symmetric
  63. static void
  64. Multiply2_sym_p8p (dReal * A, dReal * B, dReal * C, int p, int Askip)
  65. {
  66. int i, j;
  67. dReal sum, *aa, *ad, *bb, *cc;
  68. dIASSERT (p > 0 && A && B && C);
  69. bb = B;
  70. for (i = 0; i < p; i++)
  71. {
  72. //aa is going accross the matrix, ad down
  73. aa = ad = A;
  74. cc = C;
  75. for (j = i; j < p; j++)
  76. {
  77. sum = bb[0] * cc[0];
  78. sum += bb[1] * cc[1];
  79. sum += bb[2] * cc[2];
  80. sum += bb[4] * cc[4];
  81. sum += bb[5] * cc[5];
  82. sum += bb[6] * cc[6];
  83. *(aa++) = *ad = sum;
  84. ad += Askip;
  85. cc += 8;
  86. }
  87. bb += 8;
  88. A += Askip + 1;
  89. C += 8;
  90. }
  91. }
  92. static void
  93. MultiplyAdd2_sym_p8p (dReal * A, dReal * B, dReal * C, int p, int Askip)
  94. {
  95. int i, j;
  96. dReal sum, *aa, *ad, *bb, *cc;
  97. dIASSERT (p > 0 && A && B && C);
  98. bb = B;
  99. for (i = 0; i < p; i++)
  100. {
  101. //aa is going accross the matrix, ad down
  102. aa = ad = A;
  103. cc = C;
  104. for (j = i; j < p; j++)
  105. {
  106. sum = bb[0] * cc[0];
  107. sum += bb[1] * cc[1];
  108. sum += bb[2] * cc[2];
  109. sum += bb[4] * cc[4];
  110. sum += bb[5] * cc[5];
  111. sum += bb[6] * cc[6];
  112. *(aa++) += sum;
  113. *ad += sum;
  114. ad += Askip;
  115. cc += 8;
  116. }
  117. bb += 8;
  118. A += Askip + 1;
  119. C += 8;
  120. }
  121. }
  122. // this assumes the 4th and 8th rows of B are zero.
  123. static void
  124. Multiply0_p81 (dReal * A, dReal * B, dReal * C, int p)
  125. {
  126. int i;
  127. dIASSERT (p > 0 && A && B && C);
  128. dReal sum;
  129. for (i = p; i; i--)
  130. {
  131. sum = B[0] * C[0];
  132. sum += B[1] * C[1];
  133. sum += B[2] * C[2];
  134. sum += B[4] * C[4];
  135. sum += B[5] * C[5];
  136. sum += B[6] * C[6];
  137. *(A++) = sum;
  138. B += 8;
  139. }
  140. }
  141. // this assumes the 4th and 8th rows of B are zero.
  142. static void
  143. MultiplyAdd0_p81 (dReal * A, dReal * B, dReal * C, int p)
  144. {
  145. int i;
  146. dIASSERT (p > 0 && A && B && C);
  147. dReal sum;
  148. for (i = p; i; i--)
  149. {
  150. sum = B[0] * C[0];
  151. sum += B[1] * C[1];
  152. sum += B[2] * C[2];
  153. sum += B[4] * C[4];
  154. sum += B[5] * C[5];
  155. sum += B[6] * C[6];
  156. *(A++) += sum;
  157. B += 8;
  158. }
  159. }
  160. // this assumes the 4th and 8th rows of B are zero.
  161. static void
  162. Multiply1_8q1 (dReal * A, dReal * B, dReal * C, int q)
  163. {
  164. int k;
  165. dReal sum;
  166. dIASSERT (q > 0 && A && B && C);
  167. sum = 0;
  168. for (k = 0; k < q; k++)
  169. sum += B[k * 8] * C[k];
  170. A[0] = sum;
  171. sum = 0;
  172. for (k = 0; k < q; k++)
  173. sum += B[1 + k * 8] * C[k];
  174. A[1] = sum;
  175. sum = 0;
  176. for (k = 0; k < q; k++)
  177. sum += B[2 + k * 8] * C[k];
  178. A[2] = sum;
  179. sum = 0;
  180. for (k = 0; k < q; k++)
  181. sum += B[4 + k * 8] * C[k];
  182. A[4] = sum;
  183. sum = 0;
  184. for (k = 0; k < q; k++)
  185. sum += B[5 + k * 8] * C[k];
  186. A[5] = sum;
  187. sum = 0;
  188. for (k = 0; k < q; k++)
  189. sum += B[6 + k * 8] * C[k];
  190. A[6] = sum;
  191. }
  192. //****************************************************************************
  193. // body rotation
  194. // return sin(x)/x. this has a singularity at 0 so special handling is needed
  195. // for small arguments.
  196. static inline dReal
  197. sinc (dReal x)
  198. {
  199. // if |x| < 1e-4 then use a taylor series expansion. this two term expansion
  200. // is actually accurate to one LS bit within this range if double precision
  201. // is being used - so don't worry!
  202. if (dFabs (x) < 1.0e-4)
  203. return REAL (1.0) - x * x * REAL (0.166666666666666666667);
  204. else
  205. return dSin (x) / x;
  206. }
  207. #if 0 // this is just dxStepBody()
  208. // given a body b, apply its linear and angular rotation over the time
  209. // interval h, thereby adjusting its position and orientation.
  210. static inline void
  211. moveAndRotateBody (dxBody * b, dReal h)
  212. {
  213. int j;
  214. // handle linear velocity
  215. for (j = 0; j < 3; j++)
  216. b->posr.pos[j] += h * b->lvel[j];
  217. if (b->flags & dxBodyFlagFiniteRotation)
  218. {
  219. dVector3 irv; // infitesimal rotation vector
  220. dQuaternion q; // quaternion for finite rotation
  221. if (b->flags & dxBodyFlagFiniteRotationAxis)
  222. {
  223. // split the angular velocity vector into a component along the finite
  224. // rotation axis, and a component orthogonal to it.
  225. dVector3 frv, irv; // finite rotation vector
  226. dReal k = dDOT (b->finite_rot_axis, b->avel);
  227. frv[0] = b->finite_rot_axis[0] * k;
  228. frv[1] = b->finite_rot_axis[1] * k;
  229. frv[2] = b->finite_rot_axis[2] * k;
  230. irv[0] = b->avel[0] - frv[0];
  231. irv[1] = b->avel[1] - frv[1];
  232. irv[2] = b->avel[2] - frv[2];
  233. // make a rotation quaternion q that corresponds to frv * h.
  234. // compare this with the full-finite-rotation case below.
  235. h *= REAL (0.5);
  236. dReal theta = k * h;
  237. q[0] = dCos (theta);
  238. dReal s = sinc (theta) * h;
  239. q[1] = frv[0] * s;
  240. q[2] = frv[1] * s;
  241. q[3] = frv[2] * s;
  242. }
  243. else
  244. {
  245. // make a rotation quaternion q that corresponds to w * h
  246. dReal wlen = dSqrt (b->avel[0] * b->avel[0] + b->avel[1] * b->avel[1] + b->avel[2] * b->avel[2]);
  247. h *= REAL (0.5);
  248. dReal theta = wlen * h;
  249. q[0] = dCos (theta);
  250. dReal s = sinc (theta) * h;
  251. q[1] = b->avel[0] * s;
  252. q[2] = b->avel[1] * s;
  253. q[3] = b->avel[2] * s;
  254. }
  255. // do the finite rotation
  256. dQuaternion q2;
  257. dQMultiply0 (q2, q, b->q);
  258. for (j = 0; j < 4; j++)
  259. b->q[j] = q2[j];
  260. // do the infitesimal rotation if required
  261. if (b->flags & dxBodyFlagFiniteRotationAxis)
  262. {
  263. dReal dq[4];
  264. dWtoDQ (irv, b->q, dq);
  265. for (j = 0; j < 4; j++)
  266. b->q[j] += h * dq[j];
  267. }
  268. }
  269. else
  270. {
  271. // the normal way - do an infitesimal rotation
  272. dReal dq[4];
  273. dWtoDQ (b->avel, b->q, dq);
  274. for (j = 0; j < 4; j++)
  275. b->q[j] += h * dq[j];
  276. }
  277. // normalize the quaternion and convert it to a rotation matrix
  278. dNormalize4 (b->q);
  279. dQtoR (b->q, b->posr.R);
  280. // notify all attached geoms that this body has moved
  281. for (dxGeom * geom = b->geom; geom; geom = dGeomGetBodyNext (geom))
  282. dGeomMoved (geom);
  283. }
  284. #endif
  285. //****************************************************************************
  286. //This is an implementation of the iterated/relaxation algorithm.
  287. //Here is a quick overview of the algorithm per Sergi Valverde's posts to the
  288. //mailing list:
  289. //
  290. // for i=0..N-1 do
  291. // for c = 0..C-1 do
  292. // Solve constraint c-th
  293. // Apply forces to constraint bodies
  294. // next
  295. // next
  296. // Integrate bodies
  297. void
  298. dInternalStepFast (dxWorld * world, dxBody * body[2], dReal * GI[2], dReal * GinvI[2], dxJoint * joint, dxJoint::Info1 info, dxJoint::Info2 Jinfo, dReal stepsize)
  299. {
  300. int i, j, k;
  301. # ifdef TIMING
  302. dTimerNow ("constraint preprocessing");
  303. # endif
  304. dReal stepsize1 = dRecip (stepsize);
  305. int m = info.m;
  306. // nothing to do if no constraints.
  307. if (m <= 0)
  308. return;
  309. int nub = 0;
  310. if (info.nub == info.m)
  311. nub = m;
  312. // compute A = J*invM*J'. first compute JinvM = J*invM. this has the same
  313. // format as J so we just go through the constraints in J multiplying by
  314. // the appropriate scalars and matrices.
  315. # ifdef TIMING
  316. dTimerNow ("compute A");
  317. # endif
  318. dReal JinvM[2 * 6 * 8];
  319. //dSetZero (JinvM, 2 * m * 8);
  320. dReal *Jsrc = Jinfo.J1l;
  321. dReal *Jdst = JinvM;
  322. if (body[0])
  323. {
  324. for (j = m - 1; j >= 0; j--)
  325. {
  326. for (k = 0; k < 3; k++)
  327. Jdst[k] = Jsrc[k] * body[0]->invMass;
  328. dMULTIPLY0_133 (Jdst + 4, Jsrc + 4, GinvI[0]);
  329. Jsrc += 8;
  330. Jdst += 8;
  331. }
  332. }
  333. if (body[1])
  334. {
  335. Jsrc = Jinfo.J2l;
  336. Jdst = JinvM + 8 * m;
  337. for (j = m - 1; j >= 0; j--)
  338. {
  339. for (k = 0; k < 3; k++)
  340. Jdst[k] = Jsrc[k] * body[1]->invMass;
  341. dMULTIPLY0_133 (Jdst + 4, Jsrc + 4, GinvI[1]);
  342. Jsrc += 8;
  343. Jdst += 8;
  344. }
  345. }
  346. // now compute A = JinvM * J'.
  347. int mskip = dPAD (m);
  348. dReal A[6 * 8];
  349. //dSetZero (A, 6 * 8);
  350. if (body[0]) {
  351. Multiply2_sym_p8p (A, JinvM, Jinfo.J1l, m, mskip);
  352. if (body[1])
  353. MultiplyAdd2_sym_p8p (A, JinvM + 8 * m, Jinfo.J2l,
  354. m, mskip);
  355. } else {
  356. if (body[1])
  357. Multiply2_sym_p8p (A, JinvM + 8 * m, Jinfo.J2l,
  358. m, mskip);
  359. }
  360. // add cfm to the diagonal of A
  361. for (i = 0; i < m; i++)
  362. A[i * mskip + i] += Jinfo.cfm[i] * stepsize1;
  363. // compute the right hand side `rhs'
  364. # ifdef TIMING
  365. dTimerNow ("compute rhs");
  366. # endif
  367. dReal tmp1[16];
  368. //dSetZero (tmp1, 16);
  369. // put v/h + invM*fe into tmp1
  370. for (i = 0; i < 2; i++)
  371. {
  372. if (!body[i])
  373. continue;
  374. for (j = 0; j < 3; j++)
  375. tmp1[i * 8 + j] = body[i]->facc[j] * body[i]->invMass + body[i]->lvel[j] * stepsize1;
  376. dMULTIPLY0_331 (tmp1 + i * 8 + 4, GinvI[i], body[i]->tacc);
  377. for (j = 0; j < 3; j++)
  378. tmp1[i * 8 + 4 + j] += body[i]->avel[j] * stepsize1;
  379. }
  380. // put J*tmp1 into rhs
  381. dReal rhs[6];
  382. //dSetZero (rhs, 6);
  383. if (body[0]) {
  384. Multiply0_p81 (rhs, Jinfo.J1l, tmp1, m);
  385. if (body[1])
  386. MultiplyAdd0_p81 (rhs, Jinfo.J2l, tmp1 + 8, m);
  387. } else {
  388. if (body[1])
  389. Multiply0_p81 (rhs, Jinfo.J2l, tmp1 + 8, m);
  390. }
  391. // complete rhs
  392. for (i = 0; i < m; i++)
  393. rhs[i] = Jinfo.c[i] * stepsize1 - rhs[i];
  394. #ifdef SLOW_LCP
  395. // solve the LCP problem and get lambda.
  396. // this will destroy A but that's okay
  397. # ifdef TIMING
  398. dTimerNow ("solving LCP problem");
  399. # endif
  400. dReal *lambda = (dReal *) ALLOCA (m * sizeof (dReal));
  401. dReal *residual = (dReal *) ALLOCA (m * sizeof (dReal));
  402. dReal lo[6], hi[6];
  403. memcpy (lo, Jinfo.lo, m * sizeof (dReal));
  404. memcpy (hi, Jinfo.hi, m * sizeof (dReal));
  405. dSolveLCP (m, A, lambda, rhs, residual, nub, lo, hi, Jinfo.findex);
  406. #endif
  407. // LCP Solver replacement:
  408. // This algorithm goes like this:
  409. // Do a straightforward LDLT factorization of the matrix A, solving for
  410. // A*x = rhs
  411. // For each x[i] that is outside of the bounds of lo[i] and hi[i],
  412. // clamp x[i] into that range.
  413. // Substitute into A the now known x's
  414. // subtract the residual away from the rhs.
  415. // Remove row and column i from L, updating the factorization
  416. // place the known x's at the end of the array, keeping up with location in p
  417. // Repeat until all constraints have been clamped or all are within bounds
  418. //
  419. // This is probably only faster in the single joint case where only one repeat is
  420. // the norm.
  421. #ifdef FAST_FACTOR
  422. // factorize A (L*D*L'=A)
  423. # ifdef TIMING
  424. dTimerNow ("factorize A");
  425. # endif
  426. dReal d[6];
  427. dReal L[6 * 8];
  428. memcpy (L, A, m * mskip * sizeof (dReal));
  429. dFactorLDLT (L, d, m, mskip);
  430. // compute lambda
  431. # ifdef TIMING
  432. dTimerNow ("compute lambda");
  433. # endif
  434. int left = m; //constraints left to solve.
  435. int remove[6];
  436. dReal lambda[6];
  437. dReal x[6];
  438. int p[6];
  439. for (i = 0; i < 6; i++)
  440. p[i] = i;
  441. while (true)
  442. {
  443. memcpy (x, rhs, left * sizeof (dReal));
  444. dSolveLDLT (L, d, x, left, mskip);
  445. int fixed = 0;
  446. for (i = 0; i < left; i++)
  447. {
  448. j = p[i];
  449. remove[i] = false;
  450. // This isn't the exact same use of findex as dSolveLCP.... since x[findex]
  451. // may change after I've already clamped x[i], but it should be close
  452. if (Jinfo.findex[j] > -1)
  453. {
  454. dReal f = fabs (Jinfo.hi[j] * x[p[Jinfo.findex[j]]]);
  455. if (x[i] > f)
  456. x[i] = f;
  457. else if (x[i] < -f)
  458. x[i] = -f;
  459. else
  460. continue;
  461. }
  462. else
  463. {
  464. if (x[i] > Jinfo.hi[j])
  465. x[i] = Jinfo.hi[j];
  466. else if (x[i] < Jinfo.lo[j])
  467. x[i] = Jinfo.lo[j];
  468. else
  469. continue;
  470. }
  471. remove[i] = true;
  472. fixed++;
  473. }
  474. if (fixed == 0 || fixed == left) //no change or all constraints solved
  475. break;
  476. for (i = 0; i < left; i++) //sub in to right hand side.
  477. if (remove[i])
  478. for (j = 0; j < left; j++)
  479. if (!remove[j])
  480. rhs[j] -= A[j * mskip + i] * x[i];
  481. for (int r = left - 1; r >= 0; r--) //eliminate row/col for fixed variables
  482. {
  483. if (remove[r])
  484. {
  485. //dRemoveLDLT adapted for use without row pointers.
  486. if (r == left - 1)
  487. {
  488. left--;
  489. continue; // deleting last row/col is easy
  490. }
  491. else if (r == 0)
  492. {
  493. dReal a[6];
  494. for (i = 0; i < left; i++)
  495. a[i] = -A[i * mskip];
  496. a[0] += REAL (1.0);
  497. dLDLTAddTL (L, d, a, left, mskip);
  498. }
  499. else
  500. {
  501. dReal t[6];
  502. dReal a[6];
  503. for (i = 0; i < r; i++)
  504. t[i] = L[r * mskip + i] / d[i];
  505. for (i = 0; i < left - r; i++)
  506. a[i] = dDot (L + (r + i) * mskip, t, r) - A[(r + i) * mskip + r];
  507. a[0] += REAL (1.0);
  508. dLDLTAddTL (L + r * mskip + r, d + r, a, left - r, mskip);
  509. }
  510. dRemoveRowCol (L, left, mskip, r);
  511. //end dRemoveLDLT
  512. left--;
  513. if (r < (left - 1))
  514. {
  515. dReal tx = x[r];
  516. memmove (d + r, d + r + 1, (left - r) * sizeof (dReal));
  517. memmove (rhs + r, rhs + r + 1, (left - r) * sizeof (dReal));
  518. //x will get written over by rhs anyway, no need to move it around
  519. //just store the fixed value we just discovered in it.
  520. x[left] = tx;
  521. for (i = 0; i < m; i++)
  522. if (p[i] > r && p[i] <= left)
  523. p[i]--;
  524. p[r] = left;
  525. }
  526. }
  527. }
  528. }
  529. for (i = 0; i < m; i++)
  530. lambda[i] = x[p[i]];
  531. # endif
  532. // compute the constraint force `cforce'
  533. # ifdef TIMING
  534. dTimerNow ("compute constraint force");
  535. #endif
  536. // compute cforce = J'*lambda
  537. dJointFeedback *fb = joint->feedback;
  538. dReal cforce[16];
  539. //dSetZero (cforce, 16);
  540. if (fb)
  541. {
  542. // the user has requested feedback on the amount of force that this
  543. // joint is applying to the bodies. we use a slightly slower
  544. // computation that splits out the force components and puts them
  545. // in the feedback structure.
  546. dReal data1[8], data2[8];
  547. if (body[0])
  548. {
  549. Multiply1_8q1 (data1, Jinfo.J1l, lambda, m);
  550. dReal *cf1 = cforce;
  551. cf1[0] = (fb->f1[0] = data1[0]);
  552. cf1[1] = (fb->f1[1] = data1[1]);
  553. cf1[2] = (fb->f1[2] = data1[2]);
  554. cf1[4] = (fb->t1[0] = data1[4]);
  555. cf1[5] = (fb->t1[1] = data1[5]);
  556. cf1[6] = (fb->t1[2] = data1[6]);
  557. }
  558. if (body[1])
  559. {
  560. Multiply1_8q1 (data2, Jinfo.J2l, lambda, m);
  561. dReal *cf2 = cforce + 8;
  562. cf2[0] = (fb->f2[0] = data2[0]);
  563. cf2[1] = (fb->f2[1] = data2[1]);
  564. cf2[2] = (fb->f2[2] = data2[2]);
  565. cf2[4] = (fb->t2[0] = data2[4]);
  566. cf2[5] = (fb->t2[1] = data2[5]);
  567. cf2[6] = (fb->t2[2] = data2[6]);
  568. }
  569. }
  570. else
  571. {
  572. // no feedback is required, let's compute cforce the faster way
  573. if (body[0])
  574. Multiply1_8q1 (cforce, Jinfo.J1l, lambda, m);
  575. if (body[1])
  576. Multiply1_8q1 (cforce + 8, Jinfo.J2l, lambda, m);
  577. }
  578. for (i = 0; i < 2; i++)
  579. {
  580. if (!body[i])
  581. continue;
  582. for (j = 0; j < 3; j++)
  583. {
  584. body[i]->facc[j] += cforce[i * 8 + j];
  585. body[i]->tacc[j] += cforce[i * 8 + 4 + j];
  586. }
  587. }
  588. }
  589. void
  590. dInternalStepIslandFast (dxWorld * world, dxBody * const *bodies, int nb, dxJoint * const *_joints, int nj, dReal stepsize, int maxiterations)
  591. {
  592. # ifdef TIMING
  593. dTimerNow ("preprocessing");
  594. # endif
  595. dxBody *bodyPair[2], *body;
  596. dReal *GIPair[2], *GinvIPair[2];
  597. dxJoint *joint;
  598. int iter, b, j, i;
  599. dReal ministep = stepsize / maxiterations;
  600. // make a local copy of the joint array, because we might want to modify it.
  601. // (the "dxJoint *const*" declaration says we're allowed to modify the joints
  602. // but not the joint array, because the caller might need it unchanged).
  603. dxJoint **joints = (dxJoint **) ALLOCA (nj * sizeof (dxJoint *));
  604. memcpy (joints, _joints, nj * sizeof (dxJoint *));
  605. // get m = total constraint dimension, nub = number of unbounded variables.
  606. // create constraint offset array and number-of-rows array for all joints.
  607. // the constraints are re-ordered as follows: the purely unbounded
  608. // constraints, the mixed unbounded + LCP constraints, and last the purely
  609. // LCP constraints. this assists the LCP solver to put all unbounded
  610. // variables at the start for a quick factorization.
  611. //
  612. // joints with m=0 are inactive and are removed from the joints array
  613. // entirely, so that the code that follows does not consider them.
  614. // also number all active joints in the joint list (set their tag values).
  615. // inactive joints receive a tag value of -1.
  616. int m = 0;
  617. dxJoint::Info1 * info = (dxJoint::Info1 *) ALLOCA (nj * sizeof (dxJoint::Info1));
  618. int *ofs = (int *) ALLOCA (nj * sizeof (int));
  619. for (i = 0, j = 0; j < nj; j++)
  620. { // i=dest, j=src
  621. joints[j]->getInfo1 (info + i);
  622. dIASSERT (info[i].m >= 0 && info[i].m <= 6 && info[i].nub >= 0 && info[i].nub <= info[i].m);
  623. if (info[i].m > 0)
  624. {
  625. joints[i] = joints[j];
  626. joints[i]->tag = i;
  627. i++;
  628. }
  629. else
  630. {
  631. joints[j]->tag = -1;
  632. }
  633. }
  634. nj = i;
  635. // the purely unbounded constraints
  636. for (i = 0; i < nj; i++)
  637. {
  638. ofs[i] = m;
  639. m += info[i].m;
  640. }
  641. dReal *c = NULL;
  642. dReal *cfm = NULL;
  643. dReal *lo = NULL;
  644. dReal *hi = NULL;
  645. int *findex = NULL;
  646. dReal *J = NULL;
  647. dxJoint::Info2 * Jinfo = NULL;
  648. if (m)
  649. {
  650. // create a constraint equation right hand side vector `c', a constraint
  651. // force mixing vector `cfm', and LCP low and high bound vectors, and an
  652. // 'findex' vector.
  653. c = (dReal *) ALLOCA (m * sizeof (dReal));
  654. cfm = (dReal *) ALLOCA (m * sizeof (dReal));
  655. lo = (dReal *) ALLOCA (m * sizeof (dReal));
  656. hi = (dReal *) ALLOCA (m * sizeof (dReal));
  657. findex = (int *) ALLOCA (m * sizeof (int));
  658. dSetZero (c, m);
  659. dSetValue (cfm, m, world->global_cfm);
  660. dSetValue (lo, m, -dInfinity);
  661. dSetValue (hi, m, dInfinity);
  662. for (i = 0; i < m; i++)
  663. findex[i] = -1;
  664. // get jacobian data from constraints. a (2*m)x8 matrix will be created
  665. // to store the two jacobian blocks from each constraint. it has this
  666. // format:
  667. //
  668. // l l l 0 a a a 0 \ .
  669. // l l l 0 a a a 0 }-- jacobian body 1 block for joint 0 (3 rows)
  670. // l l l 0 a a a 0 /
  671. // l l l 0 a a a 0 \ .
  672. // l l l 0 a a a 0 }-- jacobian body 2 block for joint 0 (3 rows)
  673. // l l l 0 a a a 0 /
  674. // l l l 0 a a a 0 }--- jacobian body 1 block for joint 1 (1 row)
  675. // l l l 0 a a a 0 }--- jacobian body 2 block for joint 1 (1 row)
  676. // etc...
  677. //
  678. // (lll) = linear jacobian data
  679. // (aaa) = angular jacobian data
  680. //
  681. # ifdef TIMING
  682. dTimerNow ("create J");
  683. # endif
  684. J = (dReal *) ALLOCA (2 * m * 8 * sizeof (dReal));
  685. dSetZero (J, 2 * m * 8);
  686. Jinfo = (dxJoint::Info2 *) ALLOCA (nj * sizeof (dxJoint::Info2));
  687. for (i = 0; i < nj; i++)
  688. {
  689. Jinfo[i].rowskip = 8;
  690. Jinfo[i].fps = dRecip (stepsize);
  691. Jinfo[i].erp = world->global_erp;
  692. Jinfo[i].J1l = J + 2 * 8 * ofs[i];
  693. Jinfo[i].J1a = Jinfo[i].J1l + 4;
  694. Jinfo[i].J2l = Jinfo[i].J1l + 8 * info[i].m;
  695. Jinfo[i].J2a = Jinfo[i].J2l + 4;
  696. Jinfo[i].c = c + ofs[i];
  697. Jinfo[i].cfm = cfm + ofs[i];
  698. Jinfo[i].lo = lo + ofs[i];
  699. Jinfo[i].hi = hi + ofs[i];
  700. Jinfo[i].findex = findex + ofs[i];
  701. //joints[i]->getInfo2 (Jinfo+i);
  702. }
  703. }
  704. dReal *saveFacc = (dReal *) ALLOCA (nb * 4 * sizeof (dReal));
  705. dReal *saveTacc = (dReal *) ALLOCA (nb * 4 * sizeof (dReal));
  706. dReal *globalI = (dReal *) ALLOCA (nb * 12 * sizeof (dReal));
  707. dReal *globalInvI = (dReal *) ALLOCA (nb * 12 * sizeof (dReal));
  708. for (b = 0; b < nb; b++)
  709. {
  710. for (i = 0; i < 4; i++)
  711. {
  712. saveFacc[b * 4 + i] = bodies[b]->facc[i];
  713. saveTacc[b * 4 + i] = bodies[b]->tacc[i];
  714. }
  715. bodies[b]->tag = b;
  716. }
  717. for (iter = 0; iter < maxiterations; iter++)
  718. {
  719. # ifdef TIMING
  720. dTimerNow ("applying inertia and gravity");
  721. # endif
  722. dReal tmp[12] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
  723. for (b = 0; b < nb; b++)
  724. {
  725. body = bodies[b];
  726. // for all bodies, compute the inertia tensor and its inverse in the global
  727. // frame, and compute the rotational force and add it to the torque
  728. // accumulator. I and invI are vertically stacked 3x4 matrices, one per body.
  729. // @@@ check computation of rotational force.
  730. // compute inertia tensor in global frame
  731. dMULTIPLY2_333 (tmp, body->mass.I, body->posr.R);
  732. dMULTIPLY0_333 (globalI + b * 12, body->posr.R, tmp);
  733. // compute inverse inertia tensor in global frame
  734. dMULTIPLY2_333 (tmp, body->invI, body->posr.R);
  735. dMULTIPLY0_333 (globalInvI + b * 12, body->posr.R, tmp);
  736. for (i = 0; i < 4; i++)
  737. body->tacc[i] = saveTacc[b * 4 + i];
  738. if (body->flags & dxBodyGyroscopic) {
  739. // DanielKO: this doesn't look right/efficient, but anyways...
  740. // compute rotational force
  741. dMULTIPLY0_331 (tmp, globalI + b * 12, body->avel);
  742. dCROSS (body->tacc, -=, body->avel, tmp);
  743. }
  744. // add the gravity force to all bodies
  745. if ((body->flags & dxBodyNoGravity) == 0)
  746. {
  747. body->facc[0] = saveFacc[b * 4 + 0] + body->mass.mass * world->gravity[0];
  748. body->facc[1] = saveFacc[b * 4 + 1] + body->mass.mass * world->gravity[1];
  749. body->facc[2] = saveFacc[b * 4 + 2] + body->mass.mass * world->gravity[2];
  750. body->facc[3] = 0;
  751. } else {
  752. body->facc[0] = saveFacc[b * 4 + 0];
  753. body->facc[1] = saveFacc[b * 4 + 1];
  754. body->facc[2] = saveFacc[b * 4 + 2];
  755. body->facc[3] = 0;
  756. }
  757. }
  758. #ifdef RANDOM_JOINT_ORDER
  759. #ifdef TIMING
  760. dTimerNow ("randomizing joint order");
  761. #endif
  762. //randomize the order of the joints by looping through the array
  763. //and swapping the current joint pointer with a random one before it.
  764. for (j = 0; j < nj; j++)
  765. {
  766. joint = joints[j];
  767. dxJoint::Info1 i1 = info[j];
  768. dxJoint::Info2 i2 = Jinfo[j];
  769. const int r = dRandInt(j+1);
  770. dIASSERT (r < nj);
  771. joints[j] = joints[r];
  772. info[j] = info[r];
  773. Jinfo[j] = Jinfo[r];
  774. joints[r] = joint;
  775. info[r] = i1;
  776. Jinfo[r] = i2;
  777. }
  778. #endif
  779. //now iterate through the random ordered joint array we created.
  780. for (j = 0; j < nj; j++)
  781. {
  782. #ifdef TIMING
  783. dTimerNow ("setting up joint");
  784. #endif
  785. joint = joints[j];
  786. bodyPair[0] = joint->node[0].body;
  787. bodyPair[1] = joint->node[1].body;
  788. if (bodyPair[0] && (bodyPair[0]->flags & dxBodyDisabled))
  789. bodyPair[0] = 0;
  790. if (bodyPair[1] && (bodyPair[1]->flags & dxBodyDisabled))
  791. bodyPair[1] = 0;
  792. //if this joint is not connected to any enabled bodies, skip it.
  793. if (!bodyPair[0] && !bodyPair[1])
  794. continue;
  795. if (bodyPair[0])
  796. {
  797. GIPair[0] = globalI + bodyPair[0]->tag * 12;
  798. GinvIPair[0] = globalInvI + bodyPair[0]->tag * 12;
  799. }
  800. if (bodyPair[1])
  801. {
  802. GIPair[1] = globalI + bodyPair[1]->tag * 12;
  803. GinvIPair[1] = globalInvI + bodyPair[1]->tag * 12;
  804. }
  805. joints[j]->getInfo2 (Jinfo + j);
  806. //dInternalStepIslandFast is an exact copy of the old routine with one
  807. //modification: the calculated forces are added back to the facc and tacc
  808. //vectors instead of applying them to the bodies and moving them.
  809. if (info[j].m > 0)
  810. {
  811. dInternalStepFast (world, bodyPair, GIPair, GinvIPair, joint, info[j], Jinfo[j], ministep);
  812. }
  813. }
  814. // }
  815. # ifdef TIMING
  816. dTimerNow ("moving bodies");
  817. # endif
  818. //Now we can simulate all the free floating bodies, and move them.
  819. for (b = 0; b < nb; b++)
  820. {
  821. body = bodies[b];
  822. for (i = 0; i < 4; i++)
  823. {
  824. body->facc[i] *= ministep;
  825. body->tacc[i] *= ministep;
  826. }
  827. //apply torque
  828. dMULTIPLYADD0_331 (body->avel, globalInvI + b * 12, body->tacc);
  829. //apply force
  830. for (i = 0; i < 3; i++)
  831. body->lvel[i] += body->invMass * body->facc[i];
  832. //move It!
  833. dxStepBody (body, ministep);
  834. }
  835. }
  836. for (b = 0; b < nb; b++)
  837. for (j = 0; j < 4; j++)
  838. bodies[b]->facc[j] = bodies[b]->tacc[j] = 0;
  839. }
  840. #ifdef NO_ISLANDS
  841. // Since the iterative algorithm doesn't care about islands of bodies, this is a
  842. // faster algorithm that just sends it all the joints and bodies in one array.
  843. // It's downfall is it's inability to handle disabled bodies as well as the old one.
  844. static void
  845. processIslandsFast (dxWorld * world, dReal stepsize, int maxiterations)
  846. {
  847. // nothing to do if no bodies
  848. if (world->nb <= 0)
  849. return;
  850. dInternalHandleAutoDisabling (world,stepsize);
  851. # ifdef TIMING
  852. dTimerStart ("creating joint and body arrays");
  853. # endif
  854. dxBody **bodies, *body;
  855. dxJoint **joints, *joint;
  856. joints = (dxJoint **) ALLOCA (world->nj * sizeof (dxJoint *));
  857. bodies = (dxBody **) ALLOCA (world->nb * sizeof (dxBody *));
  858. int nj = 0;
  859. for (joint = world->firstjoint; joint; joint = (dxJoint *) joint->next)
  860. joints[nj++] = joint;
  861. int nb = 0;
  862. for (body = world->firstbody; body; body = (dxBody *) body->next)
  863. bodies[nb++] = body;
  864. dInternalStepIslandFast (world, bodies, nb, joints, nj, stepsize, maxiterations);
  865. # ifdef TIMING
  866. dTimerEnd ();
  867. dTimerReport (stdout, 1);
  868. # endif
  869. }
  870. #else
  871. //****************************************************************************
  872. // island processing
  873. // this groups all joints and bodies in a world into islands. all objects
  874. // in an island are reachable by going through connected bodies and joints.
  875. // each island can be simulated separately.
  876. // note that joints that are not attached to anything will not be included
  877. // in any island, an so they do not affect the simulation.
  878. //
  879. // this function starts new island from unvisited bodies. however, it will
  880. // never start a new islands from a disabled body. thus islands of disabled
  881. // bodies will not be included in the simulation. disabled bodies are
  882. // re-enabled if they are found to be part of an active island.
  883. static void
  884. processIslandsFast (dxWorld * world, dReal stepsize, int maxiterations)
  885. {
  886. #ifdef TIMING
  887. dTimerStart ("Island Setup");
  888. #endif
  889. dxBody *b, *bb, **body;
  890. dxJoint *j, **joint;
  891. // nothing to do if no bodies
  892. if (world->nb <= 0)
  893. return;
  894. dInternalHandleAutoDisabling (world,stepsize);
  895. // make arrays for body and joint lists (for a single island) to go into
  896. body = (dxBody **) ALLOCA (world->nb * sizeof (dxBody *));
  897. joint = (dxJoint **) ALLOCA (world->nj * sizeof (dxJoint *));
  898. int bcount = 0; // number of bodies in `body'
  899. int jcount = 0; // number of joints in `joint'
  900. int tbcount = 0;
  901. int tjcount = 0;
  902. // set all body/joint tags to 0
  903. for (b = world->firstbody; b; b = (dxBody *) b->next)
  904. b->tag = 0;
  905. for (j = world->firstjoint; j; j = (dxJoint *) j->next)
  906. j->tag = 0;
  907. // allocate a stack of unvisited bodies in the island. the maximum size of
  908. // the stack can be the lesser of the number of bodies or joints, because
  909. // new bodies are only ever added to the stack by going through untagged
  910. // joints. all the bodies in the stack must be tagged!
  911. int stackalloc = (world->nj < world->nb) ? world->nj : world->nb;
  912. dxBody **stack = (dxBody **) ALLOCA (stackalloc * sizeof (dxBody *));
  913. int *autostack = (int *) ALLOCA (stackalloc * sizeof (int));
  914. for (bb = world->firstbody; bb; bb = (dxBody *) bb->next)
  915. {
  916. #ifdef TIMING
  917. dTimerNow ("Island Processing");
  918. #endif
  919. // get bb = the next enabled, untagged body, and tag it
  920. if (bb->tag || (bb->flags & dxBodyDisabled) || (bb->invMass == 0))
  921. continue;
  922. bb->tag = 1;
  923. // tag all bodies and joints starting from bb.
  924. int stacksize = 0;
  925. int autoDepth = autoEnableDepth;
  926. b = bb;
  927. body[0] = bb;
  928. bcount = 1;
  929. jcount = 0;
  930. goto quickstart;
  931. while (stacksize > 0)
  932. {
  933. b = stack[--stacksize]; // pop body off stack
  934. autoDepth = autostack[stacksize];
  935. body[bcount++] = b; // put body on body list
  936. quickstart:
  937. // traverse and tag all body's joints, add untagged connected bodies
  938. // to stack
  939. for (dxJointNode * n = b->firstjoint; n; n = n->next)
  940. {
  941. if (!n->joint->tag)
  942. {
  943. int thisDepth = autoEnableDepth;
  944. n->joint->tag = 1;
  945. joint[jcount++] = n->joint;
  946. if (n->body && !n->body->tag)
  947. {
  948. if (n->body->flags & dxBodyDisabled)
  949. thisDepth = autoDepth - 1;
  950. if (thisDepth < 0)
  951. continue;
  952. n->body->flags &= ~dxBodyDisabled;
  953. n->body->tag = 1;
  954. autostack[stacksize] = thisDepth;
  955. stack[stacksize++] = n->body;
  956. }
  957. }
  958. }
  959. dIASSERT (stacksize <= world->nb);
  960. dIASSERT (stacksize <= world->nj);
  961. }
  962. // now do something with body and joint lists
  963. dInternalStepIslandFast (world, body, bcount, joint, jcount, stepsize, maxiterations);
  964. // what we've just done may have altered the body/joint tag values.
  965. // we must make sure that these tags are nonzero.
  966. // also make sure all bodies are in the enabled state.
  967. int i;
  968. for (i = 0; i < bcount; i++)
  969. {
  970. body[i]->tag = 1;
  971. body[i]->flags &= ~dxBodyDisabled;
  972. }
  973. for (i = 0; i < jcount; i++)
  974. joint[i]->tag = 1;
  975. tbcount += bcount;
  976. tjcount += jcount;
  977. }
  978. #ifdef TIMING
  979. dMessage(0, "Total joints processed: %i, bodies: %i", tjcount, tbcount);
  980. #endif
  981. // if debugging, check that all objects (except for disabled bodies,
  982. // unconnected joints, and joints that are connected to disabled bodies)
  983. // were tagged.
  984. # ifndef dNODEBUG
  985. for (b = world->firstbody; b; b = (dxBody *) b->next)
  986. {
  987. if (b->flags & dxBodyDisabled)
  988. {
  989. if (b->tag)
  990. dDebug (0, "disabled body tagged");
  991. }
  992. else
  993. {
  994. if (!b->tag)
  995. dDebug (0, "enabled body not tagged");
  996. }
  997. }
  998. for (j = world->firstjoint; j; j = (dxJoint *) j->next)
  999. {
  1000. if ((j->node[0].body && (j->node[0].body->flags & dxBodyDisabled) == 0) || (j->node[1].body && (j->node[1].body->flags & dxBodyDisabled) == 0))
  1001. {
  1002. if (!j->tag)
  1003. dDebug (0, "attached enabled joint not tagged");
  1004. }
  1005. else
  1006. {
  1007. if (j->tag)
  1008. dDebug (0, "unattached or disabled joint tagged");
  1009. }
  1010. }
  1011. # endif
  1012. # ifdef TIMING
  1013. dTimerEnd ();
  1014. dTimerReport (stdout, 1);
  1015. # endif
  1016. }
  1017. #endif
  1018. void dWorldStepFast1 (dWorldID w, dReal stepsize, int maxiterations)
  1019. {
  1020. dUASSERT (w, "bad world argument");
  1021. dUASSERT (stepsize > 0, "stepsize must be > 0");
  1022. processIslandsFast (w, stepsize, maxiterations);
  1023. }