#include <stdio.h>
#include <pthread.h>

#include <stdlib.h>
#include <omp.h>
#include <time.h>
#include "timer.h"

#define size 1000

// The arrays
int **a, **b, **c;

int number;

/*-----------------------------------------------------------------------------
 Function name: fill
 -------------------------------------------------------------------------------
 Preconditions:
	-None
 Postconditions:
	-None
 Algorithm:
	-Put a value into every slot in all 3 arrays
 Exception/Error Handlig:
	-None
 -----------------------------------------------------------------------------*/
void fill ( )
{
	srand (time (NULL));

	for (int i=0; i<size; i++)
	{
		for (int j=0; j<size; j++)
		{
			a[i][j] = rand ()%100;
			b[i][j] = rand ()%100;
			c[i][j] = 0;
		}
	}
}

/*-----------------------------------------------------------------------------
 Function name: matMultMP
 -------------------------------------------------------------------------------
 Preconditions:
	-a, b and c are of the correct dimensions
 Postconditions:
	-c contains the multiplication of a x b
 Algorithm:
	-Determine which portion of the matrices the particular thread operates on
	-In row major order solve each slot
 Exception/Error Handlig:
	-No locking is done because of the size of the arrays: no 2 cores should be
		falsely sharing
 -----------------------------------------------------------------------------*/
void matMultMP ( )
{
	int i, value;

	omp_set_dynamic (0);

	omp_set_num_threads (number);

	#pragma omp parallel for private (i)
	for (i=0; i<size; i++)
	{
		for (int j=0; j<size; j++)
		{

			value = 0;
			#pragma omp parallel for reduction (+:value)
			for (int k=0; k<size; k++)
			{
				value += a[i][k] * b[k][j];
			}

			c[i][j] = value;
		}
	}
}

/*-----------------------------------------------------------------------------
 Function name: matMult
 -------------------------------------------------------------------------------
 Preconditions:
	-a, b and c are of the correct dimensions
 Postconditions:
	-c contains the multiplication of a x b
 Algorithm:
	-In row major order solve each slot
 Exception/Error Handlig:
	-
 -----------------------------------------------------------------------------*/
void matMult ( )
{
	int i, value;

	for (i=0; i<size; i++)
	{
		for (int j=0; j<size; j++)
		{
			for (int k=0; k<size; k++)
			{
				value += a[i][k] * b[k][j];
			}

			c[i][j] = value;
		}
    }
}
/*-----------------------------------------------------------------------------
 Function name: main
 -------------------------------------------------------------------------------
 Preconditions:
	-None
 Postconditions:
	-All dynamic memory is cleaned up
	-Correct output is provided
 Algorithm:
	-Set all variables to a default
paddle::pad
	-Start the stopwatch
	-Create the appropriate number of threads
	-Stop the stopwatch after all threads have completed
	-Do output
 Exception/Error Handlig:
	-None
 -----------------------------------------------------------------------------*/
int main (int argc, char **argv)
{

	int i, j;
	timer watch;
    	double track = 0.0f;

    	a = new int * [size];
    	b = new int * [size];
    	c = new int * [size];

    	for (i=0; i<size; i++)
    	{
        	a[i] = new int [size];
        	b[i] = new int [size];
        	c[i] = new int [size];
    	}

    	i = 0;

    	number = i;

	// Sequential Sampling
	for (j=0; j<100; j++)
    	{

        	fill ( );

        	watch.start ( );

        	matMult ( );

        	watch.stop ( );
     
        	if (j > 10 && j < 90)
        		track += watch.read ();
    	}

    	printf("%i\t%.3f\n", number, track/80.0f);

	// Parallel for 1-16 threads
    	for (i=1; i<17; i++)
    	{
        	track = 0.0f;

        	number = i;

        	for (j=0; j<100; j++)
        	{
	        	fill ( );

	        	watch.start ();

          		matMultMP ( );

        		watch.stop ();

            		if (j > 10 && j < 90)
				track += watch.read ();
        	}

    		printf("%i\t%.3f\n", number, track/80.0f);
    	}

	// Cleanup
	for (i=0; i<size; i++)
    	{
        	delete [] a[i];
        	delete [] b[i];
        	delete [] c[i];
    	}

    	delete [] a;
    	delete [] b;
    	delete [] c;
}