Test_mindot.cpp - Copyright (c) 2011 Apple Inc. Init an arr…

/SDK/bullet/Test/Source/Tests/Test_mindot.cpp

https://bitbucket.org/wlitzlbauer/spacecrafts_old · C++ · 269 lines · 218 code · 41 blank · 10 comment · 45 complexity · 8fcd5bd8b8a1d512e206d7141a3c79ee MD5 · raw file

//
//  Test_mindot.cpp
//  BulletTest
//
//  Copyright (c) 2011 Apple Inc.
//



#include "LinearMath/btScalar.h"
#if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)


#include "Test_mindot.h"
#include "vector.h"
#include "Utils.h"
#include "main.h"
#include <math.h>
#include <string.h>

#include <LinearMath/btVector3.h>


// reference code for testing purposes
static long mindot_ref(    const btSimdFloat4 *vertices, 
                       float *vec,
                       size_t count, 
                       float *dotResult );

#ifdef __arm__
    #define MAX_LOG2_SIZE   9
#else
    #define MAX_LOG2_SIZE   9
#endif
#define MAX_SIZE        (1U << MAX_LOG2_SIZE)
#define LOOPCOUNT 100

int Test_mindot(void)
{
    // Init an array flanked by guard pages
    btSimdFloat4 *data = (btSimdFloat4*) GuardCalloc( 1, MAX_SIZE * sizeof(btSimdFloat4), NULL );
    float *fp = (float*) data;
    long correct, test;
    btVector3 localScaling( 0.1f, 0.2f, 0.3f);
    size_t size;
    
    // Init the data
    size_t i;
    for( i = 0; i < MAX_SIZE; i++ )
    {
        fp[4*i] = (int32_t) RANDF_16;
        fp[4*i+1] = (int32_t) RANDF_16;
        fp[4*i+2] = (int32_t) RANDF_16;
        fp[4*i+3] = BT_NAN;     // w channel NaN
    }
    
    float correctDot, testDot;
    fp = (float*) localScaling;
	float maxRelativeError = 0.f;

    for( size = 1; size <= MAX_SIZE; size++ )
    {
        float *in = (float*)(data + MAX_SIZE - size);
        size_t position;
        
        for( position = 0; position < size; position++ )
        {
            float *biggest = in + position * 4;
            float old[4] = { biggest[0], biggest[1], biggest[2], biggest[3] };
            biggest[0] -= LARGE_FLOAT17;
            biggest[1] -= LARGE_FLOAT17;
            biggest[2] -= LARGE_FLOAT17;
            biggest[3] -= LARGE_FLOAT17;
            
            correctDot = BT_NAN;
            testDot = BT_NAN;
            correct = mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
            test = localScaling.minDot( (btVector3*) in, size, testDot);
            if( test < 0 || test >= size )
            {
                vlog( "Error @ %ld: index out of bounds! *%ld vs %ld \n", size, correct, test);
                continue;
            }
            if( correct != test )
			{
                vlog( "Error @ %ld: index misreported! *%ld vs %ld  (*%f, %f)\n", size, correct, test, 
                       fp[0] * in[4*correct] + fp[1] * in[4*correct+1]  + fp[2] * in[4*correct+2], 
                       fp[0] * in[4*test] + fp[1] * in[4*test+1]  + fp[2] * in[4*test+2] );
				return 1;
			}
            if( test != position )
			{
                vlog( "Biggest not found where it is supposed to be: *%ld vs %ld (*%f, %f)\n", position, test, 
                       fp[0] * in[4*test] + fp[1] * in[4*test+1]  + fp[2] * in[4*test+2],
                       fp[0] * in[4*position] + fp[1] * in[4*position+1]  + fp[2] * in[4*position+2]  );
				return 1;
			}

            if( correctDot != testDot )
			{
				float relativeError = btFabs((testDot - correctDot) / correctDot);
				if (relativeError>1e6)
				{
					vlog( "Error @ %ld: dotpr misreported! *%f vs %f    (*%f, %f)\n", size, correctDot, testDot, 
						   fp[0] * in[4*correct] + fp[1] * in[4*correct+1]  + fp[2] * in[4*correct+2], 
						   fp[0] * in[4*test] + fp[1] * in[4*test+1]  + fp[2] * in[4*test+2]  );
					return 1;
				} else
				{
					if (maxRelativeError < relativeError)
					{
						maxRelativeError = relativeError;
					}
				}
			}

            
            memcpy( biggest, old, 16 );
        }
    }
    
	if (maxRelativeError)
	{
		printf("Warning: relative error = %e\n", maxRelativeError);
	}
    uint64_t scalarTimes[33 + (MAX_LOG2_SIZE-5)];
    uint64_t vectorTimes[33 + (MAX_LOG2_SIZE-5)];
    size_t j, k;
    float *in = (float*) data;
    for( size = 1; size <= 32; size++ )
    {
        uint64_t startTime, bestTime, currentTime;
        
        bestTime = -1LL;
        scalarTimes[size] = 0;
        for (j = 0; j < 100; j++) {
            startTime = ReadTicks();
            for( k = 0; k < LOOPCOUNT; k++ )
                correct += mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
            currentTime = ReadTicks() - startTime;
            scalarTimes[size] += currentTime;
            if( currentTime < bestTime )
                bestTime = currentTime;
        }
        if( 0 == gReportAverageTimes )
            scalarTimes[size] = bestTime;        
        else
            scalarTimes[size] /= 100;
    }
    
    uint64_t *timep = &scalarTimes[33];
    for( size = 64; size <= MAX_SIZE; size *= 2 )
    {
        uint64_t startTime, bestTime, currentTime;
        
        bestTime = -1LL;
        timep[0] =0;
        for (j = 0; j < 100; j++) {
            startTime = ReadTicks();
            for( k = 0; k < LOOPCOUNT; k++ )
                correct += mindot_ref( (btSimdFloat4*) in, (float*) &localScaling, size, &correctDot);
            currentTime = ReadTicks() - startTime;
            timep[0] += currentTime;
            if( currentTime < bestTime )
                bestTime = currentTime;
        }
        if( 0 == gReportAverageTimes )
            timep[0] = bestTime;        
        else
            timep[0] /= 100;
        
        timep++;
    }
    
    for( size = 1; size <= 32; size++ )
    {
        uint64_t startTime, bestTime, currentTime;
        
        bestTime = -1LL;
        vectorTimes[size] = 0;
        for (j = 0; j < 100; j++) {
            startTime = ReadTicks();
            for( k = 0; k < LOOPCOUNT; k++ )
                test += localScaling.minDot( (btVector3*) in, size, testDot);
            currentTime = ReadTicks() - startTime;
            vectorTimes[size] += currentTime;
            if( currentTime < bestTime )
                bestTime = currentTime;
        }
        if( 0 == gReportAverageTimes )
            vectorTimes[size] = bestTime;        
        else
            vectorTimes[size] /= 100;
    }
    
    timep = &vectorTimes[33];
    for( size = 64; size <= MAX_SIZE; size *= 2 )
    {
        uint64_t startTime, bestTime, currentTime;
        
        bestTime = -1LL;
        timep[0] =0;
        for (j = 0; j < 100; j++) {
            startTime = ReadTicks();
            for( k = 0; k < LOOPCOUNT; k++ )
                test += localScaling.minDot( (btVector3*) in, size, testDot);
            currentTime = ReadTicks() - startTime;
            timep[0] += currentTime;
            if( currentTime < bestTime )
                bestTime = currentTime;
        }
        if( 0 == gReportAverageTimes )
            timep[0] = bestTime;        
        else
            timep[0] /= 100;
        
        timep++;
    }
    
    vlog( "Timing:\n" );
    vlog( " size\t    scalar\t    vector\n" );
    for( size = 1; size <= 32; size++ )
        vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[size] ) / LOOPCOUNT, TicksToCycles( vectorTimes[size] ) / LOOPCOUNT );
    size_t index = 33;
    for( size = 64; size <= MAX_SIZE; size *= 2 )
    {
        vlog( "%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles( scalarTimes[index] ) / LOOPCOUNT, TicksToCycles( vectorTimes[index] ) / LOOPCOUNT );
        index++;
    }
    
    // Useless check to make sure that the timing loops are not optimized away
    if( test != correct )
        vlog( "Error: Test != correct: *%ld vs. %ld\n", correct, test);
    
    GuardFree(data);
    
    return 0;
}



static long mindot_ref(    const btSimdFloat4 *vertices, 
                       float *vec,
                       size_t count, 
                       float *dotResult )
{
    
    const float *dp = (const float*) vertices;
    float  minDot = BT_INFINITY;
    long i = 0;
    long ptIndex = -1;
    
    for( i = 0; i < count; i++ )
    {
        float dot = vec[0] * dp[0] + vec[1] * dp[1] + vec[2] * dp[2];   dp += 4;
        
        if( dot < minDot )
        {
            minDot = dot;
            ptIndex = i;
        }
    }
    
    *dotResult = minDot;
    
    return ptIndex;
}

#endif //BT_USE_SSE