Bullet Collision Detection & Physics Library
btDantzigLCP.cpp
Go to the documentation of this file.
1/*************************************************************************
2* *
3* Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
4* All rights reserved. Email: russ@q12.org Web: www.q12.org *
5* *
6* This library is free software; you can redistribute it and/or *
7* modify it under the terms of EITHER: *
8* (1) The GNU Lesser General Public License as published by the Free *
9* Software Foundation; either version 2.1 of the License, or (at *
10* your option) any later version. The text of the GNU Lesser *
11* General Public License is included with this library in the *
12* file LICENSE.TXT. *
13* (2) The BSD-style license that is included with this library in *
14* the file LICENSE-BSD.TXT. *
15* *
16* This library is distributed in the hope that it will be useful, *
17* but WITHOUT ANY WARRANTY; without even the implied warranty of *
18* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19* LICENSE.TXT and LICENSE-BSD.TXT for more details. *
20* *
21*************************************************************************/
22
23/*
24
25
26THE ALGORITHM
27-------------
28
29solve A*x = b+w, with x and w subject to certain LCP conditions.
30each x(i),w(i) must lie on one of the three line segments in the following
31diagram. each line segment corresponds to one index set :
32
33 w(i)
34 /|\ | :
35 | | :
36 | |i in N :
37 w>0 | |state[i]=0 :
38 | | :
39 | | : i in C
40 w=0 + +-----------------------+
41 | : |
42 | : |
43 w<0 | : |i in N
44 | : |state[i]=1
45 | : |
46 | : |
47 +-------|-----------|-----------|----------> x(i)
48 lo 0 hi
49
50the Dantzig algorithm proceeds as follows:
51 for i=1:n
52 * if (x(i),w(i)) is not on the line, push x(i) and w(i) positive or
53 negative towards the line. as this is done, the other (x(j),w(j))
54 for j<i are constrained to be on the line. if any (x,w) reaches the
55 end of a line segment then it is switched between index sets.
56 * i is added to the appropriate index set depending on what line segment
57 it hits.
58
59we restrict lo(i) <= 0 and hi(i) >= 0. this makes the algorithm a bit
60simpler, because the starting point for x(i),w(i) is always on the dotted
61line x=0 and x will only ever increase in one direction, so it can only hit
62two out of the three line segments.
63
64
65NOTES
66-----
67
68this is an implementation of "lcp_dantzig2_ldlt.m" and "lcp_dantzig_lohi.m".
69the implementation is split into an LCP problem object (btLCP) and an LCP
70driver function. most optimization occurs in the btLCP object.
71
72a naive implementation of the algorithm requires either a lot of data motion
73or a lot of permutation-array lookup, because we are constantly re-ordering
74rows and columns. to avoid this and make a more optimized algorithm, a
75non-trivial data structure is used to represent the matrix A (this is
76implemented in the fast version of the btLCP object).
77
78during execution of this algorithm, some indexes in A are clamped (set C),
79some are non-clamped (set N), and some are "don't care" (where x=0).
80A,x,b,w (and other problem vectors) are permuted such that the clamped
81indexes are first, the unclamped indexes are next, and the don't-care
82indexes are last. this permutation is recorded in the array `p'.
83initially p = 0..n-1, and as the rows and columns of A,x,b,w are swapped,
84the corresponding elements of p are swapped.
85
86because the C and N elements are grouped together in the rows of A, we can do
87lots of work with a fast dot product function. if A,x,etc were not permuted
88and we only had a permutation array, then those dot products would be much
89slower as we would have a permutation array lookup in some inner loops.
90
91A is accessed through an array of row pointers, so that element (i,j) of the
92permuted matrix is A[i][j]. this makes row swapping fast. for column swapping
93we still have to actually move the data.
94
95during execution of this algorithm we maintain an L*D*L' factorization of
96the clamped submatrix of A (call it `AC') which is the top left nC*nC
97submatrix of A. there are two ways we could arrange the rows/columns in AC.
98
99(1) AC is always permuted such that L*D*L' = AC. this causes a problem
100when a row/column is removed from C, because then all the rows/columns of A
101between the deleted index and the end of C need to be rotated downward.
102this results in a lot of data motion and slows things down.
103(2) L*D*L' is actually a factorization of a *permutation* of AC (which is
104itself a permutation of the underlying A). this is what we do - the
105permutation is recorded in the vector C. call this permutation A[C,C].
106when a row/column is removed from C, all we have to do is swap two
107rows/columns and manipulate C.
108
109*/
110
111#include "btDantzigLCP.h"
112
113#include <string.h> //memcpy
114
115bool s_error = false;
116
117//***************************************************************************
118// code generation parameters
119
120#define btLCP_FAST // use fast btLCP object
121
122// option 1 : matrix row pointers (less data copying)
123#define BTROWPTRS
124#define BTATYPE btScalar **
125#define BTAROW(i) (m_A[i])
126
127// option 2 : no matrix row pointers (slightly faster inner loops)
128//#define NOROWPTRS
129//#define BTATYPE btScalar *
130//#define BTAROW(i) (m_A+(i)*m_nskip)
131
132#define BTNUB_OPTIMIZATIONS
133
134/* solve L*X=B, with B containing 1 right hand sides.
135 * L is an n*n lower triangular matrix with ones on the diagonal.
136 * L is stored by rows and its leading dimension is lskip.
137 * B is an n*1 matrix that contains the right hand sides.
138 * B is stored by columns and its leading dimension is also lskip.
139 * B is overwritten with X.
140 * this processes blocks of 2*2.
141 * if this is in the factorizer source file, n must be a multiple of 2.
142 */
143
144static void btSolveL1_1(const btScalar *L, btScalar *B, int n, int lskip1)
145{
146 /* declare variables - Z matrix, p and q vectors, etc */
147 btScalar Z11, m11, Z21, m21, p1, q1, p2, *ex;
148 const btScalar *ell;
149 int i, j;
150 /* compute all 2 x 1 blocks of X */
151 for (i = 0; i < n; i += 2)
152 {
153 /* compute all 2 x 1 block of X, from rows i..i+2-1 */
154 /* set the Z matrix to 0 */
155 Z11 = 0;
156 Z21 = 0;
157 ell = L + i * lskip1;
158 ex = B;
159 /* the inner loop that computes outer products and adds them to Z */
160 for (j = i - 2; j >= 0; j -= 2)
161 {
162 /* compute outer product and add it to the Z matrix */
163 p1 = ell[0];
164 q1 = ex[0];
165 m11 = p1 * q1;
166 p2 = ell[lskip1];
167 m21 = p2 * q1;
168 Z11 += m11;
169 Z21 += m21;
170 /* compute outer product and add it to the Z matrix */
171 p1 = ell[1];
172 q1 = ex[1];
173 m11 = p1 * q1;
174 p2 = ell[1 + lskip1];
175 m21 = p2 * q1;
176 /* advance pointers */
177 ell += 2;
178 ex += 2;
179 Z11 += m11;
180 Z21 += m21;
181 /* end of inner loop */
182 }
183 /* compute left-over iterations */
184 j += 2;
185 for (; j > 0; j--)
186 {
187 /* compute outer product and add it to the Z matrix */
188 p1 = ell[0];
189 q1 = ex[0];
190 m11 = p1 * q1;
191 p2 = ell[lskip1];
192 m21 = p2 * q1;
193 /* advance pointers */
194 ell += 1;
195 ex += 1;
196 Z11 += m11;
197 Z21 += m21;
198 }
199 /* finish computing the X(i) block */
200 Z11 = ex[0] - Z11;
201 ex[0] = Z11;
202 p1 = ell[lskip1];
203 Z21 = ex[1] - Z21 - p1 * Z11;
204 ex[1] = Z21;
205 /* end of outer loop */
206 }
207}
208
209/* solve L*X=B, with B containing 2 right hand sides.
210 * L is an n*n lower triangular matrix with ones on the diagonal.
211 * L is stored by rows and its leading dimension is lskip.
212 * B is an n*2 matrix that contains the right hand sides.
213 * B is stored by columns and its leading dimension is also lskip.
214 * B is overwritten with X.
215 * this processes blocks of 2*2.
216 * if this is in the factorizer source file, n must be a multiple of 2.
217 */
218
219static void btSolveL1_2(const btScalar *L, btScalar *B, int n, int lskip1)
220{
221 /* declare variables - Z matrix, p and q vectors, etc */
222 btScalar Z11, m11, Z12, m12, Z21, m21, Z22, m22, p1, q1, p2, q2, *ex;
223 const btScalar *ell;
224 int i, j;
225 /* compute all 2 x 2 blocks of X */
226 for (i = 0; i < n; i += 2)
227 {
228 /* compute all 2 x 2 block of X, from rows i..i+2-1 */
229 /* set the Z matrix to 0 */
230 Z11 = 0;
231 Z12 = 0;
232 Z21 = 0;
233 Z22 = 0;
234 ell = L + i * lskip1;
235 ex = B;
236 /* the inner loop that computes outer products and adds them to Z */
237 for (j = i - 2; j >= 0; j -= 2)
238 {
239 /* compute outer product and add it to the Z matrix */
240 p1 = ell[0];
241 q1 = ex[0];
242 m11 = p1 * q1;
243 q2 = ex[lskip1];
244 m12 = p1 * q2;
245 p2 = ell[lskip1];
246 m21 = p2 * q1;
247 m22 = p2 * q2;
248 Z11 += m11;
249 Z12 += m12;
250 Z21 += m21;
251 Z22 += m22;
252 /* compute outer product and add it to the Z matrix */
253 p1 = ell[1];
254 q1 = ex[1];
255 m11 = p1 * q1;
256 q2 = ex[1 + lskip1];
257 m12 = p1 * q2;
258 p2 = ell[1 + lskip1];
259 m21 = p2 * q1;
260 m22 = p2 * q2;
261 /* advance pointers */
262 ell += 2;
263 ex += 2;
264 Z11 += m11;
265 Z12 += m12;
266 Z21 += m21;
267 Z22 += m22;
268 /* end of inner loop */
269 }
270 /* compute left-over iterations */
271 j += 2;
272 for (; j > 0; j--)
273 {
274 /* compute outer product and add it to the Z matrix */
275 p1 = ell[0];
276 q1 = ex[0];
277 m11 = p1 * q1;
278 q2 = ex[lskip1];
279 m12 = p1 * q2;
280 p2 = ell[lskip1];
281 m21 = p2 * q1;
282 m22 = p2 * q2;
283 /* advance pointers */
284 ell += 1;
285 ex += 1;
286 Z11 += m11;
287 Z12 += m12;
288 Z21 += m21;
289 Z22 += m22;
290 }
291 /* finish computing the X(i) block */
292 Z11 = ex[0] - Z11;
293 ex[0] = Z11;
294 Z12 = ex[lskip1] - Z12;
295 ex[lskip1] = Z12;
296 p1 = ell[lskip1];
297 Z21 = ex[1] - Z21 - p1 * Z11;
298 ex[1] = Z21;
299 Z22 = ex[1 + lskip1] - Z22 - p1 * Z12;
300 ex[1 + lskip1] = Z22;
301 /* end of outer loop */
302 }
303}
304
305void btFactorLDLT(btScalar *A, btScalar *d, int n, int nskip1)
306{
307 int i, j;
308 btScalar sum, *ell, *dee, dd, p1, p2, q1, q2, Z11, m11, Z21, m21, Z22, m22;
309 if (n < 1) return;
310
311 for (i = 0; i <= n - 2; i += 2)
312 {
313 /* solve L*(D*l)=a, l is scaled elements in 2 x i block at A(i,0) */
314 btSolveL1_2(A, A + i * nskip1, i, nskip1);
315 /* scale the elements in a 2 x i block at A(i,0), and also */
316 /* compute Z = the outer product matrix that we'll need. */
317 Z11 = 0;
318 Z21 = 0;
319 Z22 = 0;
320 ell = A + i * nskip1;
321 dee = d;
322 for (j = i - 6; j >= 0; j -= 6)
323 {
324 p1 = ell[0];
325 p2 = ell[nskip1];
326 dd = dee[0];
327 q1 = p1 * dd;
328 q2 = p2 * dd;
329 ell[0] = q1;
330 ell[nskip1] = q2;
331 m11 = p1 * q1;
332 m21 = p2 * q1;
333 m22 = p2 * q2;
334 Z11 += m11;
335 Z21 += m21;
336 Z22 += m22;
337 p1 = ell[1];
338 p2 = ell[1 + nskip1];
339 dd = dee[1];
340 q1 = p1 * dd;
341 q2 = p2 * dd;
342 ell[1] = q1;
343 ell[1 + nskip1] = q2;
344 m11 = p1 * q1;
345 m21 = p2 * q1;
346 m22 = p2 * q2;
347 Z11 += m11;
348 Z21 += m21;
349 Z22 += m22;
350 p1 = ell[2];
351 p2 = ell[2 + nskip1];
352 dd = dee[2];
353 q1 = p1 * dd;
354 q2 = p2 * dd;
355 ell[2] = q1;
356 ell[2 + nskip1] = q2;
357 m11 = p1 * q1;
358 m21 = p2 * q1;
359 m22 = p2 * q2;
360 Z11 += m11;
361 Z21 += m21;
362 Z22 += m22;
363 p1 = ell[3];
364 p2 = ell[3 + nskip1];
365 dd = dee[3];
366 q1 = p1 * dd;
367 q2 = p2 * dd;
368 ell[3] = q1;
369 ell[3 + nskip1] = q2;
370 m11 = p1 * q1;
371 m21 = p2 * q1;
372 m22 = p2 * q2;
373 Z11 += m11;
374 Z21 += m21;
375 Z22 += m22;
376 p1 = ell[4];
377 p2 = ell[4 + nskip1];
378 dd = dee[4];
379 q1 = p1 * dd;
380 q2 = p2 * dd;
381 ell[4] = q1;
382 ell[4 + nskip1] = q2;
383 m11 = p1 * q1;
384 m21 = p2 * q1;
385 m22 = p2 * q2;
386 Z11 += m11;
387 Z21 += m21;
388 Z22 += m22;
389 p1 = ell[5];
390 p2 = ell[5 + nskip1];
391 dd = dee[5];
392 q1 = p1 * dd;
393 q2 = p2 * dd;
394 ell[5] = q1;
395 ell[5 + nskip1] = q2;
396 m11 = p1 * q1;
397 m21 = p2 * q1;
398 m22 = p2 * q2;
399 Z11 += m11;
400 Z21 += m21;
401 Z22 += m22;
402 ell += 6;
403 dee += 6;
404 }
405 /* compute left-over iterations */
406 j += 6;
407 for (; j > 0; j--)
408 {
409 p1 = ell[0];
410 p2 = ell[nskip1];
411 dd = dee[0];
412 q1 = p1 * dd;
413 q2 = p2 * dd;
414 ell[0] = q1;
415 ell[nskip1] = q2;
416 m11 = p1 * q1;
417 m21 = p2 * q1;
418 m22 = p2 * q2;
419 Z11 += m11;
420 Z21 += m21;
421 Z22 += m22;
422 ell++;
423 dee++;
424 }
425 /* solve for diagonal 2 x 2 block at A(i,i) */
426 Z11 = ell[0] - Z11;
427 Z21 = ell[nskip1] - Z21;
428 Z22 = ell[1 + nskip1] - Z22;
429 dee = d + i;
430 /* factorize 2 x 2 block Z,dee */
431 /* factorize row 1 */
432 dee[0] = btRecip(Z11);
433 /* factorize row 2 */
434 sum = 0;
435 q1 = Z21;
436 q2 = q1 * dee[0];
437 Z21 = q2;
438 sum += q1 * q2;
439 dee[1] = btRecip(Z22 - sum);
440 /* done factorizing 2 x 2 block */
441 ell[nskip1] = Z21;
442 }
443 /* compute the (less than 2) rows at the bottom */
444 switch (n - i)
445 {
446 case 0:
447 break;
448
449 case 1:
450 btSolveL1_1(A, A + i * nskip1, i, nskip1);
451 /* scale the elements in a 1 x i block at A(i,0), and also */
452 /* compute Z = the outer product matrix that we'll need. */
453 Z11 = 0;
454 ell = A + i * nskip1;
455 dee = d;
456 for (j = i - 6; j >= 0; j -= 6)
457 {
458 p1 = ell[0];
459 dd = dee[0];
460 q1 = p1 * dd;
461 ell[0] = q1;
462 m11 = p1 * q1;
463 Z11 += m11;
464 p1 = ell[1];
465 dd = dee[1];
466 q1 = p1 * dd;
467 ell[1] = q1;
468 m11 = p1 * q1;
469 Z11 += m11;
470 p1 = ell[2];
471 dd = dee[2];
472 q1 = p1 * dd;
473 ell[2] = q1;
474 m11 = p1 * q1;
475 Z11 += m11;
476 p1 = ell[3];
477 dd = dee[3];
478 q1 = p1 * dd;
479 ell[3] = q1;
480 m11 = p1 * q1;
481 Z11 += m11;
482 p1 = ell[4];
483 dd = dee[4];
484 q1 = p1 * dd;
485 ell[4] = q1;
486 m11 = p1 * q1;
487 Z11 += m11;
488 p1 = ell[5];
489 dd = dee[5];
490 q1 = p1 * dd;
491 ell[5] = q1;
492 m11 = p1 * q1;
493 Z11 += m11;
494 ell += 6;
495 dee += 6;
496 }
497 /* compute left-over iterations */
498 j += 6;
499 for (; j > 0; j--)
500 {
501 p1 = ell[0];
502 dd = dee[0];
503 q1 = p1 * dd;
504 ell[0] = q1;
505 m11 = p1 * q1;
506 Z11 += m11;
507 ell++;
508 dee++;
509 }
510 /* solve for diagonal 1 x 1 block at A(i,i) */
511 Z11 = ell[0] - Z11;
512 dee = d + i;
513 /* factorize 1 x 1 block Z,dee */
514 /* factorize row 1 */
515 dee[0] = btRecip(Z11);
516 /* done factorizing 1 x 1 block */
517 break;
518
519 //default: *((char*)0)=0; /* this should never happen! */
520 }
521}
522
523/* solve L*X=B, with B containing 1 right hand sides.
524 * L is an n*n lower triangular matrix with ones on the diagonal.
525 * L is stored by rows and its leading dimension is lskip.
526 * B is an n*1 matrix that contains the right hand sides.
527 * B is stored by columns and its leading dimension is also lskip.
528 * B is overwritten with X.
529 * this processes blocks of 4*4.
530 * if this is in the factorizer source file, n must be a multiple of 4.
531 */
532
533void btSolveL1(const btScalar *L, btScalar *B, int n, int lskip1)
534{
535 /* declare variables - Z matrix, p and q vectors, etc */
536 btScalar Z11, Z21, Z31, Z41, p1, q1, p2, p3, p4, *ex;
537 const btScalar *ell;
538 int lskip2, lskip3, i, j;
539 /* compute lskip values */
540 lskip2 = 2 * lskip1;
541 lskip3 = 3 * lskip1;
542 /* compute all 4 x 1 blocks of X */
543 for (i = 0; i <= n - 4; i += 4)
544 {
545 /* compute all 4 x 1 block of X, from rows i..i+4-1 */
546 /* set the Z matrix to 0 */
547 Z11 = 0;
548 Z21 = 0;
549 Z31 = 0;
550 Z41 = 0;
551 ell = L + i * lskip1;
552 ex = B;
553 /* the inner loop that computes outer products and adds them to Z */
554 for (j = i - 12; j >= 0; j -= 12)
555 {
556 /* load p and q values */
557 p1 = ell[0];
558 q1 = ex[0];
559 p2 = ell[lskip1];
560 p3 = ell[lskip2];
561 p4 = ell[lskip3];
562 /* compute outer product and add it to the Z matrix */
563 Z11 += p1 * q1;
564 Z21 += p2 * q1;
565 Z31 += p3 * q1;
566 Z41 += p4 * q1;
567 /* load p and q values */
568 p1 = ell[1];
569 q1 = ex[1];
570 p2 = ell[1 + lskip1];
571 p3 = ell[1 + lskip2];
572 p4 = ell[1 + lskip3];
573 /* compute outer product and add it to the Z matrix */
574 Z11 += p1 * q1;
575 Z21 += p2 * q1;
576 Z31 += p3 * q1;
577 Z41 += p4 * q1;
578 /* load p and q values */
579 p1 = ell[2];
580 q1 = ex[2];
581 p2 = ell[2 + lskip1];
582 p3 = ell[2 + lskip2];
583 p4 = ell[2 + lskip3];
584 /* compute outer product and add it to the Z matrix */
585 Z11 += p1 * q1;
586 Z21 += p2 * q1;
587 Z31 += p3 * q1;
588 Z41 += p4 * q1;
589 /* load p and q values */
590 p1 = ell[3];
591 q1 = ex[3];
592 p2 = ell[3 + lskip1];
593 p3 = ell[3 + lskip2];
594 p4 = ell[3 + lskip3];
595 /* compute outer product and add it to the Z matrix */
596 Z11 += p1 * q1;
597 Z21 += p2 * q1;
598 Z31 += p3 * q1;
599 Z41 += p4 * q1;
600 /* load p and q values */
601 p1 = ell[4];
602 q1 = ex[4];
603 p2 = ell[4 + lskip1];
604 p3 = ell[4 + lskip2];
605 p4 = ell[4 + lskip3];
606 /* compute outer product and add it to the Z matrix */
607 Z11 += p1 * q1;
608 Z21 += p2 * q1;
609 Z31 += p3 * q1;
610 Z41 += p4 * q1;
611 /* load p and q values */
612 p1 = ell[5];
613 q1 = ex[5];
614 p2 = ell[5 + lskip1];
615 p3 = ell[5 + lskip2];
616 p4 = ell[5 + lskip3];
617 /* compute outer product and add it to the Z matrix */
618 Z11 += p1 * q1;
619 Z21 += p2 * q1;
620 Z31 += p3 * q1;
621 Z41 += p4 * q1;
622 /* load p and q values */
623 p1 = ell[6];
624 q1 = ex[6];
625 p2 = ell[6 + lskip1];
626 p3 = ell[6 + lskip2];
627 p4 = ell[6 + lskip3];
628 /* compute outer product and add it to the Z matrix */
629 Z11 += p1 * q1;
630 Z21 += p2 * q1;
631 Z31 += p3 * q1;
632 Z41 += p4 * q1;
633 /* load p and q values */
634 p1 = ell[7];
635 q1 = ex[7];
636 p2 = ell[7 + lskip1];
637 p3 = ell[7 + lskip2];
638 p4 = ell[7 + lskip3];
639 /* compute outer product and add it to the Z matrix */
640 Z11 += p1 * q1;
641 Z21 += p2 * q1;
642 Z31 += p3 * q1;
643 Z41 += p4 * q1;
644 /* load p and q values */
645 p1 = ell[8];
646 q1 = ex[8];
647 p2 = ell[8 + lskip1];
648 p3 = ell[8 + lskip2];
649 p4 = ell[8 + lskip3];
650 /* compute outer product and add it to the Z matrix */
651 Z11 += p1 * q1;
652 Z21 += p2 * q1;
653 Z31 += p3 * q1;
654 Z41 += p4 * q1;
655 /* load p and q values */
656 p1 = ell[9];
657 q1 = ex[9];
658 p2 = ell[9 + lskip1];
659 p3 = ell[9 + lskip2];
660 p4 = ell[9 + lskip3];
661 /* compute outer product and add it to the Z matrix */
662 Z11 += p1 * q1;
663 Z21 += p2 * q1;
664 Z31 += p3 * q1;
665 Z41 += p4 * q1;
666 /* load p and q values */
667 p1 = ell[10];
668 q1 = ex[10];
669 p2 = ell[10 + lskip1];
670 p3 = ell[10 + lskip2];
671 p4 = ell[10 + lskip3];
672 /* compute outer product and add it to the Z matrix */
673 Z11 += p1 * q1;
674 Z21 += p2 * q1;
675 Z31 += p3 * q1;
676 Z41 += p4 * q1;
677 /* load p and q values */
678 p1 = ell[11];
679 q1 = ex[11];
680 p2 = ell[11 + lskip1];
681 p3 = ell[11 + lskip2];
682 p4 = ell[11 + lskip3];
683 /* compute outer product and add it to the Z matrix */
684 Z11 += p1 * q1;
685 Z21 += p2 * q1;
686 Z31 += p3 * q1;
687 Z41 += p4 * q1;
688 /* advance pointers */
689 ell += 12;
690 ex += 12;
691 /* end of inner loop */
692 }
693 /* compute left-over iterations */
694 j += 12;
695 for (; j > 0; j--)
696 {
697 /* load p and q values */
698 p1 = ell[0];
699 q1 = ex[0];
700 p2 = ell[lskip1];
701 p3 = ell[lskip2];
702 p4 = ell[lskip3];
703 /* compute outer product and add it to the Z matrix */
704 Z11 += p1 * q1;
705 Z21 += p2 * q1;
706 Z31 += p3 * q1;
707 Z41 += p4 * q1;
708 /* advance pointers */
709 ell += 1;
710 ex += 1;
711 }
712 /* finish computing the X(i) block */
713 Z11 = ex[0] - Z11;
714 ex[0] = Z11;
715 p1 = ell[lskip1];
716 Z21 = ex[1] - Z21 - p1 * Z11;
717 ex[1] = Z21;
718 p1 = ell[lskip2];
719 p2 = ell[1 + lskip2];
720 Z31 = ex[2] - Z31 - p1 * Z11 - p2 * Z21;
721 ex[2] = Z31;
722 p1 = ell[lskip3];
723 p2 = ell[1 + lskip3];
724 p3 = ell[2 + lskip3];
725 Z41 = ex[3] - Z41 - p1 * Z11 - p2 * Z21 - p3 * Z31;
726 ex[3] = Z41;
727 /* end of outer loop */
728 }
729 /* compute rows at end that are not a multiple of block size */
730 for (; i < n; i++)
731 {
732 /* compute all 1 x 1 block of X, from rows i..i+1-1 */
733 /* set the Z matrix to 0 */
734 Z11 = 0;
735 ell = L + i * lskip1;
736 ex = B;
737 /* the inner loop that computes outer products and adds them to Z */
738 for (j = i - 12; j >= 0; j -= 12)
739 {
740 /* load p and q values */
741 p1 = ell[0];
742 q1 = ex[0];
743 /* compute outer product and add it to the Z matrix */
744 Z11 += p1 * q1;
745 /* load p and q values */
746 p1 = ell[1];
747 q1 = ex[1];
748 /* compute outer product and add it to the Z matrix */
749 Z11 += p1 * q1;
750 /* load p and q values */
751 p1 = ell[2];
752 q1 = ex[2];
753 /* compute outer product and add it to the Z matrix */
754 Z11 += p1 * q1;
755 /* load p and q values */
756 p1 = ell[3];
757 q1 = ex[3];
758 /* compute outer product and add it to the Z matrix */
759 Z11 += p1 * q1;
760 /* load p and q values */
761 p1 = ell[4];
762 q1 = ex[4];
763 /* compute outer product and add it to the Z matrix */
764 Z11 += p1 * q1;
765 /* load p and q values */
766 p1 = ell[5];
767 q1 = ex[5];
768 /* compute outer product and add it to the Z matrix */
769 Z11 += p1 * q1;
770 /* load p and q values */
771 p1 = ell[6];
772 q1 = ex[6];
773 /* compute outer product and add it to the Z matrix */
774 Z11 += p1 * q1;
775 /* load p and q values */
776 p1 = ell[7];
777 q1 = ex[7];
778 /* compute outer product and add it to the Z matrix */
779 Z11 += p1 * q1;
780 /* load p and q values */
781 p1 = ell[8];
782 q1 = ex[8];
783 /* compute outer product and add it to the Z matrix */
784 Z11 += p1 * q1;
785 /* load p and q values */
786 p1 = ell[9];
787 q1 = ex[9];
788 /* compute outer product and add it to the Z matrix */
789 Z11 += p1 * q1;
790 /* load p and q values */
791 p1 = ell[10];
792 q1 = ex[10];
793 /* compute outer product and add it to the Z matrix */
794 Z11 += p1 * q1;
795 /* load p and q values */
796 p1 = ell[11];
797 q1 = ex[11];
798 /* compute outer product and add it to the Z matrix */
799 Z11 += p1 * q1;
800 /* advance pointers */
801 ell += 12;
802 ex += 12;
803 /* end of inner loop */
804 }
805 /* compute left-over iterations */
806 j += 12;
807 for (; j > 0; j--)
808 {
809 /* load p and q values */
810 p1 = ell[0];
811 q1 = ex[0];
812 /* compute outer product and add it to the Z matrix */
813 Z11 += p1 * q1;
814 /* advance pointers */
815 ell += 1;
816 ex += 1;
817 }
818 /* finish computing the X(i) block */
819 Z11 = ex[0] - Z11;
820 ex[0] = Z11;
821 }
822}
823
824/* solve L^T * x=b, with b containing 1 right hand side.
825 * L is an n*n lower triangular matrix with ones on the diagonal.
826 * L is stored by rows and its leading dimension is lskip.
827 * b is an n*1 matrix that contains the right hand side.
828 * b is overwritten with x.
829 * this processes blocks of 4.
830 */
831
832void btSolveL1T(const btScalar *L, btScalar *B, int n, int lskip1)
833{
834 /* declare variables - Z matrix, p and q vectors, etc */
835 btScalar Z11, m11, Z21, m21, Z31, m31, Z41, m41, p1, q1, p2, p3, p4, *ex;
836 const btScalar *ell;
837 int lskip2, i, j;
838 // int lskip3;
839 /* special handling for L and B because we're solving L1 *transpose* */
840 L = L + (n - 1) * (lskip1 + 1);
841 B = B + n - 1;
842 lskip1 = -lskip1;
843 /* compute lskip values */
844 lskip2 = 2 * lskip1;
845 //lskip3 = 3*lskip1;
846 /* compute all 4 x 1 blocks of X */
847 for (i = 0; i <= n - 4; i += 4)
848 {
849 /* compute all 4 x 1 block of X, from rows i..i+4-1 */
850 /* set the Z matrix to 0 */
851 Z11 = 0;
852 Z21 = 0;
853 Z31 = 0;
854 Z41 = 0;
855 ell = L - i;
856 ex = B;
857 /* the inner loop that computes outer products and adds them to Z */
858 for (j = i - 4; j >= 0; j -= 4)
859 {
860 /* load p and q values */
861 p1 = ell[0];
862 q1 = ex[0];
863 p2 = ell[-1];
864 p3 = ell[-2];
865 p4 = ell[-3];
866 /* compute outer product and add it to the Z matrix */
867 m11 = p1 * q1;
868 m21 = p2 * q1;
869 m31 = p3 * q1;
870 m41 = p4 * q1;
871 ell += lskip1;
872 Z11 += m11;
873 Z21 += m21;
874 Z31 += m31;
875 Z41 += m41;
876 /* load p and q values */
877 p1 = ell[0];
878 q1 = ex[-1];
879 p2 = ell[-1];
880 p3 = ell[-2];
881 p4 = ell[-3];
882 /* compute outer product and add it to the Z matrix */
883 m11 = p1 * q1;
884 m21 = p2 * q1;
885 m31 = p3 * q1;
886 m41 = p4 * q1;
887 ell += lskip1;
888 Z11 += m11;
889 Z21 += m21;
890 Z31 += m31;
891 Z41 += m41;
892 /* load p and q values */
893 p1 = ell[0];
894 q1 = ex[-2];
895 p2 = ell[-1];
896 p3 = ell[-2];
897 p4 = ell[-3];
898 /* compute outer product and add it to the Z matrix */
899 m11 = p1 * q1;
900 m21 = p2 * q1;
901 m31 = p3 * q1;
902 m41 = p4 * q1;
903 ell += lskip1;
904 Z11 += m11;
905 Z21 += m21;
906 Z31 += m31;
907 Z41 += m41;
908 /* load p and q values */
909 p1 = ell[0];
910 q1 = ex[-3];
911 p2 = ell[-1];
912 p3 = ell[-2];
913 p4 = ell[-3];
914 /* compute outer product and add it to the Z matrix */
915 m11 = p1 * q1;
916 m21 = p2 * q1;
917 m31 = p3 * q1;
918 m41 = p4 * q1;
919 ell += lskip1;
920 ex -= 4;
921 Z11 += m11;
922 Z21 += m21;
923 Z31 += m31;
924 Z41 += m41;
925 /* end of inner loop */
926 }
927 /* compute left-over iterations */
928 j += 4;
929 for (; j > 0; j--)
930 {
931 /* load p and q values */
932 p1 = ell[0];
933 q1 = ex[0];
934 p2 = ell[-1];
935 p3 = ell[-2];
936 p4 = ell[-3];
937 /* compute outer product and add it to the Z matrix */
938 m11 = p1 * q1;
939 m21 = p2 * q1;
940 m31 = p3 * q1;
941 m41 = p4 * q1;
942 ell += lskip1;
943 ex -= 1;
944 Z11 += m11;
945 Z21 += m21;
946 Z31 += m31;
947 Z41 += m41;
948 }
949 /* finish computing the X(i) block */
950 Z11 = ex[0] - Z11;
951 ex[0] = Z11;
952 p1 = ell[-1];
953 Z21 = ex[-1] - Z21 - p1 * Z11;
954 ex[-1] = Z21;
955 p1 = ell[-2];
956 p2 = ell[-2 + lskip1];
957 Z31 = ex[-2] - Z31 - p1 * Z11 - p2 * Z21;
958 ex[-2] = Z31;
959 p1 = ell[-3];
960 p2 = ell[-3 + lskip1];
961 p3 = ell[-3 + lskip2];
962 Z41 = ex[-3] - Z41 - p1 * Z11 - p2 * Z21 - p3 * Z31;
963 ex[-3] = Z41;
964 /* end of outer loop */
965 }
966 /* compute rows at end that are not a multiple of block size */
967 for (; i < n; i++)
968 {
969 /* compute all 1 x 1 block of X, from rows i..i+1-1 */
970 /* set the Z matrix to 0 */
971 Z11 = 0;
972 ell = L - i;
973 ex = B;
974 /* the inner loop that computes outer products and adds them to Z */
975 for (j = i - 4; j >= 0; j -= 4)
976 {
977 /* load p and q values */
978 p1 = ell[0];
979 q1 = ex[0];
980 /* compute outer product and add it to the Z matrix */
981 m11 = p1 * q1;
982 ell += lskip1;
983 Z11 += m11;
984 /* load p and q values */
985 p1 = ell[0];
986 q1 = ex[-1];
987 /* compute outer product and add it to the Z matrix */
988 m11 = p1 * q1;
989 ell += lskip1;
990 Z11 += m11;
991 /* load p and q values */
992 p1 = ell[0];
993 q1 = ex[-2];
994 /* compute outer product and add it to the Z matrix */
995 m11 = p1 * q1;
996 ell += lskip1;
997 Z11 += m11;
998 /* load p and q values */
999 p1 = ell[0];
1000 q1 = ex[-3];
1001 /* compute outer product and add it to the Z matrix */
1002 m11 = p1 * q1;
1003 ell += lskip1;
1004 ex -= 4;
1005 Z11 += m11;
1006 /* end of inner loop */
1007 }
1008 /* compute left-over iterations */
1009 j += 4;
1010 for (; j > 0; j--)
1011 {
1012 /* load p and q values */
1013 p1 = ell[0];
1014 q1 = ex[0];
1015 /* compute outer product and add it to the Z matrix */
1016 m11 = p1 * q1;
1017 ell += lskip1;
1018 ex -= 1;
1019 Z11 += m11;
1020 }
1021 /* finish computing the X(i) block */
1022 Z11 = ex[0] - Z11;
1023 ex[0] = Z11;
1024 }
1025}
1026
1027void btVectorScale(btScalar *a, const btScalar *d, int n)
1028{
1029 btAssert(a && d && n >= 0);
1030 for (int i = 0; i < n; i++)
1031 {
1032 a[i] *= d[i];
1033 }
1034}
1035
1036void btSolveLDLT(const btScalar *L, const btScalar *d, btScalar *b, int n, int nskip)
1037{
1038 btAssert(L && d && b && n > 0 && nskip >= n);
1039 btSolveL1(L, b, n, nskip);
1040 btVectorScale(b, d, n);
1041 btSolveL1T(L, b, n, nskip);
1042}
1043
1044//***************************************************************************
1045
1046// swap row/column i1 with i2 in the n*n matrix A. the leading dimension of
1047// A is nskip. this only references and swaps the lower triangle.
1048// if `do_fast_row_swaps' is nonzero and row pointers are being used, then
1049// rows will be swapped by exchanging row pointers. otherwise the data will
1050// be copied.
1051
1052static void btSwapRowsAndCols(BTATYPE A, int n, int i1, int i2, int nskip,
1053 int do_fast_row_swaps)
1054{
1055 btAssert(A && n > 0 && i1 >= 0 && i2 >= 0 && i1 < n && i2 < n &&
1056 nskip >= n && i1 < i2);
1057
1058#ifdef BTROWPTRS
1059 btScalar *A_i1 = A[i1];
1060 btScalar *A_i2 = A[i2];
1061 for (int i = i1 + 1; i < i2; ++i)
1062 {
1063 btScalar *A_i_i1 = A[i] + i1;
1064 A_i1[i] = *A_i_i1;
1065 *A_i_i1 = A_i2[i];
1066 }
1067 A_i1[i2] = A_i1[i1];
1068 A_i1[i1] = A_i2[i1];
1069 A_i2[i1] = A_i2[i2];
1070 // swap rows, by swapping row pointers
1071 if (do_fast_row_swaps)
1072 {
1073 A[i1] = A_i2;
1074 A[i2] = A_i1;
1075 }
1076 else
1077 {
1078 // Only swap till i2 column to match A plain storage variant.
1079 for (int k = 0; k <= i2; ++k)
1080 {
1081 btScalar tmp = A_i1[k];
1082 A_i1[k] = A_i2[k];
1083 A_i2[k] = tmp;
1084 }
1085 }
1086 // swap columns the hard way
1087 for (int j = i2 + 1; j < n; ++j)
1088 {
1089 btScalar *A_j = A[j];
1090 btScalar tmp = A_j[i1];
1091 A_j[i1] = A_j[i2];
1092 A_j[i2] = tmp;
1093 }
1094#else
1095 btScalar *A_i1 = A + i1 * nskip;
1096 btScalar *A_i2 = A + i2 * nskip;
1097 for (int k = 0; k < i1; ++k)
1098 {
1099 btScalar tmp = A_i1[k];
1100 A_i1[k] = A_i2[k];
1101 A_i2[k] = tmp;
1102 }
1103 btScalar *A_i = A_i1 + nskip;
1104 for (int i = i1 + 1; i < i2; A_i += nskip, ++i)
1105 {
1106 btScalar tmp = A_i2[i];
1107 A_i2[i] = A_i[i1];
1108 A_i[i1] = tmp;
1109 }
1110 {
1111 btScalar tmp = A_i1[i1];
1112 A_i1[i1] = A_i2[i2];
1113 A_i2[i2] = tmp;
1114 }
1115 btScalar *A_j = A_i2 + nskip;
1116 for (int j = i2 + 1; j < n; A_j += nskip, ++j)
1117 {
1118 btScalar tmp = A_j[i1];
1119 A_j[i1] = A_j[i2];
1120 A_j[i2] = tmp;
1121 }
1122#endif
1123}
1124
1125// swap two indexes in the n*n LCP problem. i1 must be <= i2.
1126
1128 btScalar *hi, int *p, bool *state, int *findex,
1129 int n, int i1, int i2, int nskip,
1130 int do_fast_row_swaps)
1131{
1132 btScalar tmpr;
1133 int tmpi;
1134 bool tmpb;
1135 btAssert(n > 0 && i1 >= 0 && i2 >= 0 && i1 < n && i2 < n && nskip >= n && i1 <= i2);
1136 if (i1 == i2) return;
1137
1138 btSwapRowsAndCols(A, n, i1, i2, nskip, do_fast_row_swaps);
1139
1140 tmpr = x[i1];
1141 x[i1] = x[i2];
1142 x[i2] = tmpr;
1143
1144 tmpr = b[i1];
1145 b[i1] = b[i2];
1146 b[i2] = tmpr;
1147
1148 tmpr = w[i1];
1149 w[i1] = w[i2];
1150 w[i2] = tmpr;
1151
1152 tmpr = lo[i1];
1153 lo[i1] = lo[i2];
1154 lo[i2] = tmpr;
1155
1156 tmpr = hi[i1];
1157 hi[i1] = hi[i2];
1158 hi[i2] = tmpr;
1159
1160 tmpi = p[i1];
1161 p[i1] = p[i2];
1162 p[i2] = tmpi;
1163
1164 tmpb = state[i1];
1165 state[i1] = state[i2];
1166 state[i2] = tmpb;
1167
1168 if (findex)
1169 {
1170 tmpi = findex[i1];
1171 findex[i1] = findex[i2];
1172 findex[i2] = tmpi;
1173 }
1174}
1175
1176//***************************************************************************
1177// btLCP manipulator object. this represents an n*n LCP problem.
1178//
1179// two index sets C and N are kept. each set holds a subset of
1180// the variable indexes 0..n-1. an index can only be in one set.
1181// initially both sets are empty.
1182//
1183// the index set C is special: solutions to A(C,C)\A(C,i) can be generated.
1184
1185//***************************************************************************
1186// fast implementation of btLCP. see the above definition of btLCP for
1187// interface comments.
1188//
1189// `p' records the permutation of A,x,b,w,etc. p is initially 1:n and is
1190// permuted as the other vectors/matrices are permuted.
1191//
1192// A,x,b,w,lo,hi,state,findex,p,c are permuted such that sets C,N have
1193// contiguous indexes. the don't-care indexes follow N.
1194//
1195// an L*D*L' factorization is maintained of A(C,C), and whenever indexes are
1196// added or removed from the set C the factorization is updated.
1197// thus L*D*L'=A[C,C], i.e. a permuted top left nC*nC submatrix of A.
1198// the leading dimension of the matrix L is always `nskip'.
1199//
1200// at the start there may be other indexes that are unbounded but are not
1201// included in `nub'. btLCP will permute the matrix so that absolutely all
1202// unbounded vectors are at the start. thus there may be some initial
1203// permutation.
1204//
1205// the algorithms here assume certain patterns, particularly with respect to
1206// index transfer.
1207
1208#ifdef btLCP_FAST
1209
1210struct btLCP
1211{
1212 const int m_n;
1213 const int m_nskip;
1215 int m_nC, m_nN; // size of each index set
1216 BTATYPE const m_A; // A rows
1217 btScalar *const m_x, *const m_b, *const m_w, *const m_lo, *const m_hi; // permuted LCP problem data
1218 btScalar *const m_L, *const m_d; // L*D*L' factorization of set C
1219 btScalar *const m_Dell, *const m_ell, *const m_tmp;
1220 bool *const m_state;
1221 int *const m_findex, *const m_p, *const m_C;
1222
1223 btLCP(int _n, int _nskip, int _nub, btScalar *_Adata, btScalar *_x, btScalar *_b, btScalar *_w,
1224 btScalar *_lo, btScalar *_hi, btScalar *l, btScalar *_d,
1225 btScalar *_Dell, btScalar *_ell, btScalar *_tmp,
1226 bool *_state, int *_findex, int *p, int *c, btScalar **Arows);
1227 int getNub() const { return m_nub; }
1228 void transfer_i_to_C(int i);
1229 void transfer_i_to_N(int i) { m_nN++; } // because we can assume C and N span 1:i-1
1230 void transfer_i_from_N_to_C(int i);
1232 int numC() const { return m_nC; }
1233 int numN() const { return m_nN; }
1234 int indexC(int i) const { return i; }
1235 int indexN(int i) const { return i + m_nC; }
1236 btScalar Aii(int i) const { return BTAROW(i)[i]; }
1237 btScalar AiC_times_qC(int i, btScalar *q) const { return btLargeDot(BTAROW(i), q, m_nC); }
1238 btScalar AiN_times_qN(int i, btScalar *q) const { return btLargeDot(BTAROW(i) + m_nC, q + m_nC, m_nN); }
1240 void pN_plusequals_ANi(btScalar *p, int i, int sign = 1);
1243 void solve1(btScalar *a, int i, int dir = 1, int only_transfer = 0);
1244 void unpermute();
1245};
1246
1247btLCP::btLCP(int _n, int _nskip, int _nub, btScalar *_Adata, btScalar *_x, btScalar *_b, btScalar *_w,
1248 btScalar *_lo, btScalar *_hi, btScalar *l, btScalar *_d,
1249 btScalar *_Dell, btScalar *_ell, btScalar *_tmp,
1250 bool *_state, int *_findex, int *p, int *c, btScalar **Arows) : m_n(_n), m_nskip(_nskip), m_nub(_nub), m_nC(0), m_nN(0),
1251#ifdef BTROWPTRS
1252 m_A(Arows),
1253#else
1254 m_A(_Adata),
1255#endif
1256 m_x(_x),
1257 m_b(_b),
1258 m_w(_w),
1259 m_lo(_lo),
1260 m_hi(_hi),
1261 m_L(l),
1262 m_d(_d),
1263 m_Dell(_Dell),
1264 m_ell(_ell),
1265 m_tmp(_tmp),
1266 m_state(_state),
1267 m_findex(_findex),
1268 m_p(p),
1269 m_C(c)
1270{
1271 {
1272 btSetZero(m_x, m_n);
1273 }
1274
1275 {
1276#ifdef BTROWPTRS
1277 // make matrix row pointers
1278 btScalar *aptr = _Adata;
1279 BTATYPE A = m_A;
1280 const int n = m_n, nskip = m_nskip;
1281 for (int k = 0; k < n; aptr += nskip, ++k) A[k] = aptr;
1282#endif
1283 }
1284
1285 {
1286 int *p = m_p;
1287 const int n = m_n;
1288 for (int k = 0; k < n; ++k) p[k] = k; // initially unpermuted
1289 }
1290
1291 /*
1292 // for testing, we can do some random swaps in the area i > nub
1293 {
1294 const int n = m_n;
1295 const int nub = m_nub;
1296 if (nub < n) {
1297 for (int k=0; k<100; k++) {
1298 int i1,i2;
1299 do {
1300 i1 = dRandInt(n-nub)+nub;
1301 i2 = dRandInt(n-nub)+nub;
1302 }
1303 while (i1 > i2);
1304 //printf ("--> %d %d\n",i1,i2);
1305 btSwapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,m_findex,n,i1,i2,m_nskip,0);
1306 }
1307 }
1308 */
1309
1310 // permute the problem so that *all* the unbounded variables are at the
1311 // start, i.e. look for unbounded variables not included in `nub'. we can
1312 // potentially push up `nub' this way and get a bigger initial factorization.
1313 // note that when we swap rows/cols here we must not just swap row pointers,
1314 // as the initial factorization relies on the data being all in one chunk.
1315 // variables that have findex >= 0 are *not* considered to be unbounded even
1316 // if lo=-inf and hi=inf - this is because these limits may change during the
1317 // solution process.
1318
1319 {
1320 int *findex = m_findex;
1321 btScalar *lo = m_lo, *hi = m_hi;
1322 const int n = m_n;
1323 for (int k = m_nub; k < n; ++k)
1324 {
1325 if (findex && findex[k] >= 0) continue;
1326 if (lo[k] == -BT_INFINITY && hi[k] == BT_INFINITY)
1327 {
1328 btSwapProblem(m_A, m_x, m_b, m_w, lo, hi, m_p, m_state, findex, n, m_nub, k, m_nskip, 0);
1329 m_nub++;
1330 }
1331 }
1332 }
1333
1334 // if there are unbounded variables at the start, factorize A up to that
1335 // point and solve for x. this puts all indexes 0..nub-1 into C.
1336 if (m_nub > 0)
1337 {
1338 const int nub = m_nub;
1339 {
1340 btScalar *Lrow = m_L;
1341 const int nskip = m_nskip;
1342 for (int j = 0; j < nub; Lrow += nskip, ++j) memcpy(Lrow, BTAROW(j), (j + 1) * sizeof(btScalar));
1343 }
1344 btFactorLDLT(m_L, m_d, nub, m_nskip);
1345 memcpy(m_x, m_b, nub * sizeof(btScalar));
1346 btSolveLDLT(m_L, m_d, m_x, nub, m_nskip);
1347 btSetZero(m_w, nub);
1348 {
1349 int *C = m_C;
1350 for (int k = 0; k < nub; ++k) C[k] = k;
1351 }
1352 m_nC = nub;
1353 }
1354
1355 // permute the indexes > nub such that all findex variables are at the end
1356 if (m_findex)
1357 {
1358 const int nub = m_nub;
1359 int *findex = m_findex;
1360 int num_at_end = 0;
1361 for (int k = m_n - 1; k >= nub; k--)
1362 {
1363 if (findex[k] >= 0)
1364 {
1365 btSwapProblem(m_A, m_x, m_b, m_w, m_lo, m_hi, m_p, m_state, findex, m_n, k, m_n - 1 - num_at_end, m_nskip, 1);
1366 num_at_end++;
1367 }
1368 }
1369 }
1370
1371 // print info about indexes
1372 /*
1373 {
1374 const int n = m_n;
1375 const int nub = m_nub;
1376 for (int k=0; k<n; k++) {
1377 if (k<nub) printf ("C");
1378 else if (m_lo[k]==-BT_INFINITY && m_hi[k]==BT_INFINITY) printf ("c");
1379 else printf (".");
1380 }
1381 printf ("\n");
1382 }
1383 */
1384}
1385
1387{
1388 {
1389 if (m_nC > 0)
1390 {
1391 // ell,Dell were computed by solve1(). note, ell = D \ L1solve (L,A(i,C))
1392 {
1393 const int nC = m_nC;
1394 btScalar *const Ltgt = m_L + nC * m_nskip, *ell = m_ell;
1395 for (int j = 0; j < nC; ++j) Ltgt[j] = ell[j];
1396 }
1397 const int nC = m_nC;
1398 m_d[nC] = btRecip(BTAROW(i)[i] - btLargeDot(m_ell, m_Dell, nC));
1399 }
1400 else
1401 {
1402 m_d[0] = btRecip(BTAROW(i)[i]);
1403 }
1404
1406
1407 const int nC = m_nC;
1408 m_C[nC] = nC;
1409 m_nC = nC + 1; // nC value is outdated after this line
1410 }
1411}
1412
1414{
1415 {
1416 if (m_nC > 0)
1417 {
1418 {
1419 btScalar *const aptr = BTAROW(i);
1420 btScalar *Dell = m_Dell;
1421 const int *C = m_C;
1422#ifdef BTNUB_OPTIMIZATIONS
1423 // if nub>0, initial part of aptr unpermuted
1424 const int nub = m_nub;
1425 int j = 0;
1426 for (; j < nub; ++j) Dell[j] = aptr[j];
1427 const int nC = m_nC;
1428 for (; j < nC; ++j) Dell[j] = aptr[C[j]];
1429#else
1430 const int nC = m_nC;
1431 for (int j = 0; j < nC; ++j) Dell[j] = aptr[C[j]];
1432#endif
1433 }
1435 {
1436 const int nC = m_nC;
1437 btScalar *const Ltgt = m_L + nC * m_nskip;
1438 btScalar *ell = m_ell, *Dell = m_Dell, *d = m_d;
1439 for (int j = 0; j < nC; ++j) Ltgt[j] = ell[j] = Dell[j] * d[j];
1440 }
1441 const int nC = m_nC;
1442 m_d[nC] = btRecip(BTAROW(i)[i] - btLargeDot(m_ell, m_Dell, nC));
1443 }
1444 else
1445 {
1446 m_d[0] = btRecip(BTAROW(i)[i]);
1447 }
1448
1450
1451 const int nC = m_nC;
1452 m_C[nC] = nC;
1453 m_nN--;
1454 m_nC = nC + 1; // nC value is outdated after this line
1455 }
1456
1457 // @@@ TO DO LATER
1458 // if we just finish here then we'll go back and re-solve for
1459 // delta_x. but actually we can be more efficient and incrementally
1460 // update delta_x here. but if we do this, we wont have ell and Dell
1461 // to use in updating the factorization later.
1462}
1463
1464void btRemoveRowCol(btScalar *A, int n, int nskip, int r)
1465{
1466 btAssert(A && n > 0 && nskip >= n && r >= 0 && r < n);
1467 if (r >= n - 1) return;
1468 if (r > 0)
1469 {
1470 {
1471 const size_t move_size = (n - r - 1) * sizeof(btScalar);
1472 btScalar *Adst = A + r;
1473 for (int i = 0; i < r; Adst += nskip, ++i)
1474 {
1475 btScalar *Asrc = Adst + 1;
1476 memmove(Adst, Asrc, move_size);
1477 }
1478 }
1479 {
1480 const size_t cpy_size = r * sizeof(btScalar);
1481 btScalar *Adst = A + r * nskip;
1482 for (int i = r; i < (n - 1); ++i)
1483 {
1484 btScalar *Asrc = Adst + nskip;
1485 memcpy(Adst, Asrc, cpy_size);
1486 Adst = Asrc;
1487 }
1488 }
1489 }
1490 {
1491 const size_t cpy_size = (n - r - 1) * sizeof(btScalar);
1492 btScalar *Adst = A + r * (nskip + 1);
1493 for (int i = r; i < (n - 1); ++i)
1494 {
1495 btScalar *Asrc = Adst + (nskip + 1);
1496 memcpy(Adst, Asrc, cpy_size);
1497 Adst = Asrc - 1;
1498 }
1499 }
1500}
1501
1502void btLDLTAddTL(btScalar *L, btScalar *d, const btScalar *a, int n, int nskip, btAlignedObjectArray<btScalar> &scratch)
1503{
1504 btAssert(L && d && a && n > 0 && nskip >= n);
1505
1506 if (n < 2) return;
1507 scratch.resize(2 * nskip);
1508 btScalar *W1 = &scratch[0];
1509
1510 btScalar *W2 = W1 + nskip;
1511
1512 W1[0] = btScalar(0.0);
1513 W2[0] = btScalar(0.0);
1514 for (int j = 1; j < n; ++j)
1515 {
1516 W1[j] = W2[j] = (btScalar)(a[j] * SIMDSQRT12);
1517 }
1518 btScalar W11 = (btScalar)((btScalar(0.5) * a[0] + 1) * SIMDSQRT12);
1519 btScalar W21 = (btScalar)((btScalar(0.5) * a[0] - 1) * SIMDSQRT12);
1520
1521 btScalar alpha1 = btScalar(1.0);
1522 btScalar alpha2 = btScalar(1.0);
1523
1524 {
1525 btScalar dee = d[0];
1526 btScalar alphanew = alpha1 + (W11 * W11) * dee;
1527 btAssert(alphanew != btScalar(0.0));
1528 dee /= alphanew;
1529 btScalar gamma1 = W11 * dee;
1530 dee *= alpha1;
1531 alpha1 = alphanew;
1532 alphanew = alpha2 - (W21 * W21) * dee;
1533 dee /= alphanew;
1534 //btScalar gamma2 = W21 * dee;
1535 alpha2 = alphanew;
1536 btScalar k1 = btScalar(1.0) - W21 * gamma1;
1537 btScalar k2 = W21 * gamma1 * W11 - W21;
1538 btScalar *ll = L + nskip;
1539 for (int p = 1; p < n; ll += nskip, ++p)
1540 {
1541 btScalar Wp = W1[p];
1542 btScalar ell = *ll;
1543 W1[p] = Wp - W11 * ell;
1544 W2[p] = k1 * Wp + k2 * ell;
1545 }
1546 }
1547
1548 btScalar *ll = L + (nskip + 1);
1549 for (int j = 1; j < n; ll += nskip + 1, ++j)
1550 {
1551 btScalar k1 = W1[j];
1552 btScalar k2 = W2[j];
1553
1554 btScalar dee = d[j];
1555 btScalar alphanew = alpha1 + (k1 * k1) * dee;
1556 btAssert(alphanew != btScalar(0.0));
1557 dee /= alphanew;
1558 btScalar gamma1 = k1 * dee;
1559 dee *= alpha1;
1560 alpha1 = alphanew;
1561 alphanew = alpha2 - (k2 * k2) * dee;
1562 dee /= alphanew;
1563 btScalar gamma2 = k2 * dee;
1564 dee *= alpha2;
1565 d[j] = dee;
1566 alpha2 = alphanew;
1567
1568 btScalar *l = ll + nskip;
1569 for (int p = j + 1; p < n; l += nskip, ++p)
1570 {
1571 btScalar ell = *l;
1572 btScalar Wp = W1[p] - k1 * ell;
1573 ell += gamma1 * Wp;
1574 W1[p] = Wp;
1575 Wp = W2[p] - k2 * ell;
1576 ell -= gamma2 * Wp;
1577 W2[p] = Wp;
1578 *l = ell;
1579 }
1580 }
1581}
1582
1583#define _BTGETA(i, j) (A[i][j])
1584//#define _GETA(i,j) (A[(i)*nskip+(j)])
1585#define BTGETA(i, j) ((i > j) ? _BTGETA(i, j) : _BTGETA(j, i))
1586
1587inline size_t btEstimateLDLTAddTLTmpbufSize(int nskip)
1588{
1589 return nskip * 2 * sizeof(btScalar);
1590}
1591
1592void btLDLTRemove(btScalar **A, const int *p, btScalar *L, btScalar *d,
1593 int n1, int n2, int r, int nskip, btAlignedObjectArray<btScalar> &scratch)
1594{
1595 btAssert(A && p && L && d && n1 > 0 && n2 > 0 && r >= 0 && r < n2 &&
1596 n1 >= n2 && nskip >= n1);
1597#ifdef BT_DEBUG
1598 for (int i = 0; i < n2; ++i)
1599 btAssert(p[i] >= 0 && p[i] < n1);
1600#endif
1601
1602 if (r == n2 - 1)
1603 {
1604 return; // deleting last row/col is easy
1605 }
1606 else
1607 {
1608 size_t LDLTAddTL_size = btEstimateLDLTAddTLTmpbufSize(nskip);
1609 btAssert(LDLTAddTL_size % sizeof(btScalar) == 0);
1610 scratch.resize(nskip * 2 + n2);
1611 btScalar *tmp = &scratch[0];
1612 if (r == 0)
1613 {
1614 btScalar *a = (btScalar *)((char *)tmp + LDLTAddTL_size);
1615 const int p_0 = p[0];
1616 for (int i = 0; i < n2; ++i)
1617 {
1618 a[i] = -BTGETA(p[i], p_0);
1619 }
1620 a[0] += btScalar(1.0);
1621 btLDLTAddTL(L, d, a, n2, nskip, scratch);
1622 }
1623 else
1624 {
1625 btScalar *t = (btScalar *)((char *)tmp + LDLTAddTL_size);
1626 {
1627 btScalar *Lcurr = L + r * nskip;
1628 for (int i = 0; i < r; ++Lcurr, ++i)
1629 {
1630 btAssert(d[i] != btScalar(0.0));
1631 t[i] = *Lcurr / d[i];
1632 }
1633 }
1634 btScalar *a = t + r;
1635 {
1636 btScalar *Lcurr = L + r * nskip;
1637 const int *pp_r = p + r, p_r = *pp_r;
1638 const int n2_minus_r = n2 - r;
1639 for (int i = 0; i < n2_minus_r; Lcurr += nskip, ++i)
1640 {
1641 a[i] = btLargeDot(Lcurr, t, r) - BTGETA(pp_r[i], p_r);
1642 }
1643 }
1644 a[0] += btScalar(1.0);
1645 btLDLTAddTL(L + r * nskip + r, d + r, a, n2 - r, nskip, scratch);
1646 }
1647 }
1648
1649 // snip out row/column r from L and d
1650 btRemoveRowCol(L, n2, nskip, r);
1651 if (r < (n2 - 1)) memmove(d + r, d + r + 1, (n2 - r - 1) * sizeof(btScalar));
1652}
1653
1655{
1656 {
1657 int *C = m_C;
1658 // remove a row/column from the factorization, and adjust the
1659 // indexes (black magic!)
1660 int last_idx = -1;
1661 const int nC = m_nC;
1662 int j = 0;
1663 for (; j < nC; ++j)
1664 {
1665 if (C[j] == nC - 1)
1666 {
1667 last_idx = j;
1668 }
1669 if (C[j] == i)
1670 {
1671 btLDLTRemove(m_A, C, m_L, m_d, m_n, nC, j, m_nskip, scratch);
1672 int k;
1673 if (last_idx == -1)
1674 {
1675 for (k = j + 1; k < nC; ++k)
1676 {
1677 if (C[k] == nC - 1)
1678 {
1679 break;
1680 }
1681 }
1682 btAssert(k < nC);
1683 }
1684 else
1685 {
1686 k = last_idx;
1687 }
1688 C[k] = C[j];
1689 if (j < (nC - 1)) memmove(C + j, C + j + 1, (nC - j - 1) * sizeof(int));
1690 break;
1691 }
1692 }
1693 btAssert(j < nC);
1694
1695 btSwapProblem(m_A, m_x, m_b, m_w, m_lo, m_hi, m_p, m_state, m_findex, m_n, i, nC - 1, m_nskip, 1);
1696
1697 m_nN++;
1698 m_nC = nC - 1; // nC value is outdated after this line
1699 }
1700}
1701
1703{
1704 // we could try to make this matrix-vector multiplication faster using
1705 // outer product matrix tricks, e.g. with the dMultidotX() functions.
1706 // but i tried it and it actually made things slower on random 100x100
1707 // problems because of the overhead involved. so we'll stick with the
1708 // simple method for now.
1709 const int nC = m_nC;
1710 btScalar *ptgt = p + nC;
1711 const int nN = m_nN;
1712 for (int i = 0; i < nN; ++i)
1713 {
1714 ptgt[i] = btLargeDot(BTAROW(i + nC), q, nC);
1715 }
1716}
1717
1718void btLCP::pN_plusequals_ANi(btScalar *p, int i, int sign)
1719{
1720 const int nC = m_nC;
1721 btScalar *aptr = BTAROW(i) + nC;
1722 btScalar *ptgt = p + nC;
1723 if (sign > 0)
1724 {
1725 const int nN = m_nN;
1726 for (int j = 0; j < nN; ++j) ptgt[j] += aptr[j];
1727 }
1728 else
1729 {
1730 const int nN = m_nN;
1731 for (int j = 0; j < nN; ++j) ptgt[j] -= aptr[j];
1732 }
1733}
1734
1736{
1737 const int nC = m_nC;
1738 for (int i = 0; i < nC; ++i)
1739 {
1740 p[i] += s * q[i];
1741 }
1742}
1743
1745{
1746 const int nC = m_nC;
1747 btScalar *ptgt = p + nC, *qsrc = q + nC;
1748 const int nN = m_nN;
1749 for (int i = 0; i < nN; ++i)
1750 {
1751 ptgt[i] += s * qsrc[i];
1752 }
1753}
1754
1755void btLCP::solve1(btScalar *a, int i, int dir, int only_transfer)
1756{
1757 // the `Dell' and `ell' that are computed here are saved. if index i is
1758 // later added to the factorization then they can be reused.
1759 //
1760 // @@@ question: do we need to solve for entire delta_x??? yes, but
1761 // only if an x goes below 0 during the step.
1762
1763 if (m_nC > 0)
1764 {
1765 {
1766 btScalar *Dell = m_Dell;
1767 int *C = m_C;
1768 btScalar *aptr = BTAROW(i);
1769#ifdef BTNUB_OPTIMIZATIONS
1770 // if nub>0, initial part of aptr[] is guaranteed unpermuted
1771 const int nub = m_nub;
1772 int j = 0;
1773 for (; j < nub; ++j) Dell[j] = aptr[j];
1774 const int nC = m_nC;
1775 for (; j < nC; ++j) Dell[j] = aptr[C[j]];
1776#else
1777 const int nC = m_nC;
1778 for (int j = 0; j < nC; ++j) Dell[j] = aptr[C[j]];
1779#endif
1780 }
1782 {
1783 btScalar *ell = m_ell, *Dell = m_Dell, *d = m_d;
1784 const int nC = m_nC;
1785 for (int j = 0; j < nC; ++j) ell[j] = Dell[j] * d[j];
1786 }
1787
1788 if (!only_transfer)
1789 {
1790 btScalar *tmp = m_tmp, *ell = m_ell;
1791 {
1792 const int nC = m_nC;
1793 for (int j = 0; j < nC; ++j) tmp[j] = ell[j];
1794 }
1795 btSolveL1T(m_L, tmp, m_nC, m_nskip);
1796 if (dir > 0)
1797 {
1798 int *C = m_C;
1799 btScalar *tmp = m_tmp;
1800 const int nC = m_nC;
1801 for (int j = 0; j < nC; ++j) a[C[j]] = -tmp[j];
1802 }
1803 else
1804 {
1805 int *C = m_C;
1806 btScalar *tmp = m_tmp;
1807 const int nC = m_nC;
1808 for (int j = 0; j < nC; ++j) a[C[j]] = tmp[j];
1809 }
1810 }
1811 }
1812}
1813
1815{
1816 // now we have to un-permute x and w
1817 {
1818 memcpy(m_tmp, m_x, m_n * sizeof(btScalar));
1819 btScalar *x = m_x, *tmp = m_tmp;
1820 const int *p = m_p;
1821 const int n = m_n;
1822 for (int j = 0; j < n; ++j) x[p[j]] = tmp[j];
1823 }
1824 {
1825 memcpy(m_tmp, m_w, m_n * sizeof(btScalar));
1826 btScalar *w = m_w, *tmp = m_tmp;
1827 const int *p = m_p;
1828 const int n = m_n;
1829 for (int j = 0; j < n; ++j) w[p[j]] = tmp[j];
1830 }
1831}
1832
1833#endif // btLCP_FAST
1834
1835//***************************************************************************
1836// an optimized Dantzig LCP driver routine for the lo-hi LCP problem.
1837
1839 btScalar *outer_w, int nub, btScalar *lo, btScalar *hi, int *findex, btDantzigScratchMemory &scratchMem)
1840{
1841 s_error = false;
1842
1843 // printf("btSolveDantzigLCP n=%d\n",n);
1844 btAssert(n > 0 && A && x && b && lo && hi && nub >= 0 && nub <= n);
1845 btAssert(outer_w);
1846
1847#ifdef BT_DEBUG
1848 {
1849 // check restrictions on lo and hi
1850 for (int k = 0; k < n; ++k)
1851 btAssert(lo[k] <= 0 && hi[k] >= 0);
1852 }
1853#endif
1854
1855 // if all the variables are unbounded then we can just factor, solve,
1856 // and return
1857 if (nub >= n)
1858 {
1859 int nskip = (n);
1860 btFactorLDLT(A, outer_w, n, nskip);
1861 btSolveLDLT(A, outer_w, b, n, nskip);
1862 memcpy(x, b, n * sizeof(btScalar));
1863
1864 return !s_error;
1865 }
1866
1867 const int nskip = (n);
1868 scratchMem.L.resize(n * nskip);
1869
1870 scratchMem.d.resize(n);
1871
1872 btScalar *w = outer_w;
1873 scratchMem.delta_w.resize(n);
1874 scratchMem.delta_x.resize(n);
1875 scratchMem.Dell.resize(n);
1876 scratchMem.ell.resize(n);
1877 scratchMem.Arows.resize(n);
1878 scratchMem.p.resize(n);
1879 scratchMem.C.resize(n);
1880
1881 // for i in N, state[i] is 0 if x(i)==lo(i) or 1 if x(i)==hi(i)
1882 scratchMem.state.resize(n);
1883
1884 // create LCP object. note that tmp is set to delta_w to save space, this
1885 // optimization relies on knowledge of how tmp is used, so be careful!
1886 btLCP lcp(n, nskip, nub, A, x, b, w, lo, hi, &scratchMem.L[0], &scratchMem.d[0], &scratchMem.Dell[0], &scratchMem.ell[0], &scratchMem.delta_w[0], &scratchMem.state[0], findex, &scratchMem.p[0], &scratchMem.C[0], &scratchMem.Arows[0]);
1887 int adj_nub = lcp.getNub();
1888
1889 // loop over all indexes adj_nub..n-1. for index i, if x(i),w(i) satisfy the
1890 // LCP conditions then i is added to the appropriate index set. otherwise
1891 // x(i),w(i) is driven either +ve or -ve to force it to the valid region.
1892 // as we drive x(i), x(C) is also adjusted to keep w(C) at zero.
1893 // while driving x(i) we maintain the LCP conditions on the other variables
1894 // 0..i-1. we do this by watching out for other x(i),w(i) values going
1895 // outside the valid region, and then switching them between index sets
1896 // when that happens.
1897
1898 bool hit_first_friction_index = false;
1899 for (int i = adj_nub; i < n; ++i)
1900 {
1901 s_error = false;
1902 // the index i is the driving index and indexes i+1..n-1 are "dont care",
1903 // i.e. when we make changes to the system those x's will be zero and we
1904 // don't care what happens to those w's. in other words, we only consider
1905 // an (i+1)*(i+1) sub-problem of A*x=b+w.
1906
1907 // if we've hit the first friction index, we have to compute the lo and
1908 // hi values based on the values of x already computed. we have been
1909 // permuting the indexes, so the values stored in the findex vector are
1910 // no longer valid. thus we have to temporarily unpermute the x vector.
1911 // for the purposes of this computation, 0*infinity = 0 ... so if the
1912 // contact constraint's normal force is 0, there should be no tangential
1913 // force applied.
1914
1915 if (!hit_first_friction_index && findex && findex[i] >= 0)
1916 {
1917 // un-permute x into delta_w, which is not being used at the moment
1918 for (int j = 0; j < n; ++j) scratchMem.delta_w[scratchMem.p[j]] = x[j];
1919
1920 // set lo and hi values
1921 for (int k = i; k < n; ++k)
1922 {
1923 btScalar wfk = scratchMem.delta_w[findex[k]];
1924 if (wfk == 0)
1925 {
1926 hi[k] = 0;
1927 lo[k] = 0;
1928 }
1929 else
1930 {
1931 hi[k] = btFabs(hi[k] * wfk);
1932 lo[k] = -hi[k];
1933 }
1934 }
1935 hit_first_friction_index = true;
1936 }
1937
1938 // thus far we have not even been computing the w values for indexes
1939 // greater than i, so compute w[i] now.
1940 w[i] = lcp.AiC_times_qC(i, x) + lcp.AiN_times_qN(i, x) - b[i];
1941
1942 // if lo=hi=0 (which can happen for tangential friction when normals are
1943 // 0) then the index will be assigned to set N with some state. however,
1944 // set C's line has zero size, so the index will always remain in set N.
1945 // with the "normal" switching logic, if w changed sign then the index
1946 // would have to switch to set C and then back to set N with an inverted
1947 // state. this is pointless, and also computationally expensive. to
1948 // prevent this from happening, we use the rule that indexes with lo=hi=0
1949 // will never be checked for set changes. this means that the state for
1950 // these indexes may be incorrect, but that doesn't matter.
1951
1952 // see if x(i),w(i) is in a valid region
1953 if (lo[i] == 0 && w[i] >= 0)
1954 {
1955 lcp.transfer_i_to_N(i);
1956 scratchMem.state[i] = false;
1957 }
1958 else if (hi[i] == 0 && w[i] <= 0)
1959 {
1960 lcp.transfer_i_to_N(i);
1961 scratchMem.state[i] = true;
1962 }
1963 else if (w[i] == 0)
1964 {
1965 // this is a degenerate case. by the time we get to this test we know
1966 // that lo != 0, which means that lo < 0 as lo is not allowed to be +ve,
1967 // and similarly that hi > 0. this means that the line segment
1968 // corresponding to set C is at least finite in extent, and we are on it.
1969 // NOTE: we must call lcp.solve1() before lcp.transfer_i_to_C()
1970 lcp.solve1(&scratchMem.delta_x[0], i, 0, 1);
1971
1972 lcp.transfer_i_to_C(i);
1973 }
1974 else
1975 {
1976 // we must push x(i) and w(i)
1977 for (;;)
1978 {
1979 int dir;
1980 btScalar dirf;
1981 // find direction to push on x(i)
1982 if (w[i] <= 0)
1983 {
1984 dir = 1;
1985 dirf = btScalar(1.0);
1986 }
1987 else
1988 {
1989 dir = -1;
1990 dirf = btScalar(-1.0);
1991 }
1992
1993 // compute: delta_x(C) = -dir*A(C,C)\A(C,i)
1994 lcp.solve1(&scratchMem.delta_x[0], i, dir);
1995
1996 // note that delta_x[i] = dirf, but we wont bother to set it
1997
1998 // compute: delta_w = A*delta_x ... note we only care about
1999 // delta_w(N) and delta_w(i), the rest is ignored
2000 lcp.pN_equals_ANC_times_qC(&scratchMem.delta_w[0], &scratchMem.delta_x[0]);
2001 lcp.pN_plusequals_ANi(&scratchMem.delta_w[0], i, dir);
2002 scratchMem.delta_w[i] = lcp.AiC_times_qC(i, &scratchMem.delta_x[0]) + lcp.Aii(i) * dirf;
2003
2004 // find largest step we can take (size=s), either to drive x(i),w(i)
2005 // to the valid LCP region or to drive an already-valid variable
2006 // outside the valid region.
2007
2008 int cmd = 1; // index switching command
2009 int si = 0; // si = index to switch if cmd>3
2010 btScalar s = -w[i] / scratchMem.delta_w[i];
2011 if (dir > 0)
2012 {
2013 if (hi[i] < BT_INFINITY)
2014 {
2015 btScalar s2 = (hi[i] - x[i]) * dirf; // was (hi[i]-x[i])/dirf // step to x(i)=hi(i)
2016 if (s2 < s)
2017 {
2018 s = s2;
2019 cmd = 3;
2020 }
2021 }
2022 }
2023 else
2024 {
2025 if (lo[i] > -BT_INFINITY)
2026 {
2027 btScalar s2 = (lo[i] - x[i]) * dirf; // was (lo[i]-x[i])/dirf // step to x(i)=lo(i)
2028 if (s2 < s)
2029 {
2030 s = s2;
2031 cmd = 2;
2032 }
2033 }
2034 }
2035
2036 {
2037 const int numN = lcp.numN();
2038 for (int k = 0; k < numN; ++k)
2039 {
2040 const int indexN_k = lcp.indexN(k);
2041 if (!scratchMem.state[indexN_k] ? scratchMem.delta_w[indexN_k] < 0 : scratchMem.delta_w[indexN_k] > 0)
2042 {
2043 // don't bother checking if lo=hi=0
2044 if (lo[indexN_k] == 0 && hi[indexN_k] == 0) continue;
2045 btScalar s2 = -w[indexN_k] / scratchMem.delta_w[indexN_k];
2046 if (s2 < s)
2047 {
2048 s = s2;
2049 cmd = 4;
2050 si = indexN_k;
2051 }
2052 }
2053 }
2054 }
2055
2056 {
2057 const int numC = lcp.numC();
2058 for (int k = adj_nub; k < numC; ++k)
2059 {
2060 const int indexC_k = lcp.indexC(k);
2061 if (scratchMem.delta_x[indexC_k] < 0 && lo[indexC_k] > -BT_INFINITY)
2062 {
2063 btScalar s2 = (lo[indexC_k] - x[indexC_k]) / scratchMem.delta_x[indexC_k];
2064 if (s2 < s)
2065 {
2066 s = s2;
2067 cmd = 5;
2068 si = indexC_k;
2069 }
2070 }
2071 if (scratchMem.delta_x[indexC_k] > 0 && hi[indexC_k] < BT_INFINITY)
2072 {
2073 btScalar s2 = (hi[indexC_k] - x[indexC_k]) / scratchMem.delta_x[indexC_k];
2074 if (s2 < s)
2075 {
2076 s = s2;
2077 cmd = 6;
2078 si = indexC_k;
2079 }
2080 }
2081 }
2082 }
2083
2084 //static char* cmdstring[8] = {0,"->C","->NL","->NH","N->C",
2085 // "C->NL","C->NH"};
2086 //printf ("cmd=%d (%s), si=%d\n",cmd,cmdstring[cmd],(cmd>3) ? si : i);
2087
2088 // if s <= 0 then we've got a problem. if we just keep going then
2089 // we're going to get stuck in an infinite loop. instead, just cross
2090 // our fingers and exit with the current solution.
2091 if (s <= btScalar(0.0))
2092 {
2093 // printf("LCP internal error, s <= 0 (s=%.4e)",(double)s);
2094 if (i < n)
2095 {
2096 btSetZero(x + i, n - i);
2097 btSetZero(w + i, n - i);
2098 }
2099 s_error = true;
2100 break;
2101 }
2102
2103 // apply x = x + s * delta_x
2104 lcp.pC_plusequals_s_times_qC(x, s, &scratchMem.delta_x[0]);
2105 x[i] += s * dirf;
2106
2107 // apply w = w + s * delta_w
2108 lcp.pN_plusequals_s_times_qN(w, s, &scratchMem.delta_w[0]);
2109 w[i] += s * scratchMem.delta_w[i];
2110
2111 // void *tmpbuf;
2112 // switch indexes between sets if necessary
2113 switch (cmd)
2114 {
2115 case 1: // done
2116 w[i] = 0;
2117 lcp.transfer_i_to_C(i);
2118 break;
2119 case 2: // done
2120 x[i] = lo[i];
2121 scratchMem.state[i] = false;
2122 lcp.transfer_i_to_N(i);
2123 break;
2124 case 3: // done
2125 x[i] = hi[i];
2126 scratchMem.state[i] = true;
2127 lcp.transfer_i_to_N(i);
2128 break;
2129 case 4: // keep going
2130 w[si] = 0;
2131 lcp.transfer_i_from_N_to_C(si);
2132 break;
2133 case 5: // keep going
2134 x[si] = lo[si];
2135 scratchMem.state[si] = false;
2136 lcp.transfer_i_from_C_to_N(si, scratchMem.m_scratch);
2137 break;
2138 case 6: // keep going
2139 x[si] = hi[si];
2140 scratchMem.state[si] = true;
2141 lcp.transfer_i_from_C_to_N(si, scratchMem.m_scratch);
2142 break;
2143 }
2144
2145 if (cmd <= 3) break;
2146 } // for (;;)
2147 } // else
2148
2149 if (s_error)
2150 {
2151 break;
2152 }
2153 } // for (int i=adj_nub; i<n; ++i)
2154
2155 lcp.unpermute();
2156
2157 return !s_error;
2158}
void btLDLTAddTL(btScalar *L, btScalar *d, const btScalar *a, int n, int nskip, btAlignedObjectArray< btScalar > &scratch)
static void btSolveL1_1(const btScalar *L, btScalar *B, int n, int lskip1)
void btLDLTRemove(btScalar **A, const int *p, btScalar *L, btScalar *d, int n1, int n2, int r, int nskip, btAlignedObjectArray< btScalar > &scratch)
#define BTAROW(i)
void btSolveL1(const btScalar *L, btScalar *B, int n, int lskip1)
void btSolveL1T(const btScalar *L, btScalar *B, int n, int lskip1)
static void btSwapRowsAndCols(BTATYPE A, int n, int i1, int i2, int nskip, int do_fast_row_swaps)
static void btSwapProblem(BTATYPE A, btScalar *x, btScalar *b, btScalar *w, btScalar *lo, btScalar *hi, int *p, bool *state, int *findex, int n, int i1, int i2, int nskip, int do_fast_row_swaps)
bool btSolveDantzigLCP(int n, btScalar *A, btScalar *x, btScalar *b, btScalar *outer_w, int nub, btScalar *lo, btScalar *hi, int *findex, btDantzigScratchMemory &scratchMem)
#define BTATYPE
void btSolveLDLT(const btScalar *L, const btScalar *d, btScalar *b, int n, int nskip)
bool s_error
size_t btEstimateLDLTAddTLTmpbufSize(int nskip)
void btRemoveRowCol(btScalar *A, int n, int nskip, int r)
void btFactorLDLT(btScalar *A, btScalar *d, int n, int nskip1)
void btVectorScale(btScalar *a, const btScalar *d, int n)
#define BTGETA(i, j)
#define BTROWPTRS
static void btSolveL1_2(const btScalar *L, btScalar *B, int n, int lskip1)
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
Definition: btScalar.h:314
void btSetZero(T *a, int n)
Definition: btScalar.h:739
#define SIMDSQRT12
Definition: btScalar.h:531
btScalar btFabs(btScalar x)
Definition: btScalar.h:497
#define BT_INFINITY
Definition: btScalar.h:405
#define btRecip(x)
Definition: btScalar.h:533
btScalar btLargeDot(const btScalar *a, const btScalar *b, int n)
Definition: btScalar.h:750
#define btAssert(x)
Definition: btScalar.h:153
static T sum(const btAlignedObjectArray< T > &items)
void resize(int newsize, const T &fillData=T())
btAlignedObjectArray< int > C
Definition: btDantzigLCP.h:65
btAlignedObjectArray< btScalar > delta_w
Definition: btDantzigLCP.h:59
btAlignedObjectArray< btScalar > delta_x
Definition: btDantzigLCP.h:60
btAlignedObjectArray< btScalar > ell
Definition: btDantzigLCP.h:62
btAlignedObjectArray< btScalar > Dell
Definition: btDantzigLCP.h:61
btAlignedObjectArray< btScalar > d
Definition: btDantzigLCP.h:58
btAlignedObjectArray< btScalar > L
Definition: btDantzigLCP.h:57
btAlignedObjectArray< bool > state
Definition: btDantzigLCP.h:66
btAlignedObjectArray< btScalar > m_scratch
Definition: btDantzigLCP.h:56
btAlignedObjectArray< int > p
Definition: btDantzigLCP.h:64
btAlignedObjectArray< btScalar * > Arows
Definition: btDantzigLCP.h:63
btScalar Aii(int i) const
int indexN(int i) const
bool *const m_state
int indexC(int i) const
void pN_equals_ANC_times_qC(btScalar *p, btScalar *q)
BTATYPE const m_A
btScalar *const *const *const *const m_lo
btScalar *const m_L
btScalar AiN_times_qN(int i, btScalar *q) const
int getNub() const
btScalar *const *const m_ell
int *const m_findex
void transfer_i_to_N(int i)
void unpermute()
int *const *const *const m_C
btScalar *const m_Dell
void pN_plusequals_s_times_qN(btScalar *p, btScalar s, btScalar *q)
btScalar *const *const m_d
void transfer_i_from_N_to_C(int i)
btScalar *const *const *const *const *const m_hi
int *const *const m_p
void solve1(btScalar *a, int i, int dir=1, int only_transfer=0)
int numN() const
btScalar *const *const *const m_tmp
int numC() const
void pC_plusequals_s_times_qC(btScalar *p, btScalar s, btScalar *q)
void pN_plusequals_ANi(btScalar *p, int i, int sign=1)
void transfer_i_to_C(int i)
const int m_n
btScalar *const *const m_b
btScalar AiC_times_qC(int i, btScalar *q) const
btLCP(int _n, int _nskip, int _nub, btScalar *_Adata, btScalar *_x, btScalar *_b, btScalar *_w, btScalar *_lo, btScalar *_hi, btScalar *l, btScalar *_d, btScalar *_Dell, btScalar *_ell, btScalar *_tmp, bool *_state, int *_findex, int *p, int *c, btScalar **Arows)
const int m_nskip
btScalar *const m_x
btScalar *const *const *const m_w
void transfer_i_from_C_to_N(int i, btAlignedObjectArray< btScalar > &scratch)