#include <stdio.h>
#include <pthread.h>

#include <stdlib.h>
#include <time.h>
#include <cmath>
#include "timer.h"

#define size 1000000

// Tricks the cache
struct trick
{
	int total;
	char fluff[64];
};

// Global memory
trick counters[16];

int number;
int *array;
int overall;
pthread_mutex_t lock;

// fills the array with random values [0, 10)
void fill ( )
{
	srand (time (NULL));

	for (int i=0; i<size; i++)
	{
		array[i] = rand()%10;
	}
}

// Threaded Count Function
void *countTH (void* args)
{
	size_t trueid;

	// Get your ID number
	trueid = reinterpret_cast <size_t> (args);

	// Get your partition
	int l_per_thread = (int)(ceil((float)size/(float)number));

	int start = l_per_thread * (trueid);
	int end = start + l_per_thread;

	// Set the end appropriately
	if (end > size)
	{
		end = size;
	}

	// Count your 3's
	for (int i=start; i<end; i++)
	{
		if (array[i] == 3)
		{
			counters[trueid].total++;
		}
	}

	// Add them into the final count
	pthread_mutex_lock (&lock);
	overall += counters[(trueid)].total;

	pthread_mutex_unlock (&lock);
}

// Sequentially count
void count ( )
{
	for (int i=0; i<size; i++)
	{
		if (array[i] == 3)
		{
			overall++;
		}
	}
}

int main (int argc, char **argv)
{
	int i, j, k;

	pthread_t workers[16];
	timer watch;
    	double time;

	// Storage for the values
    	array = new int[size];

	// Time is not used up

    	time = 0.0f;

    	number = i = 0;

	// Do 10000 iterations of sequential counting
    	for (j=0; j<10000; j++)
    	{

        	fill ();

        	overall = 0;

		// Begin timing

		watch.start ();

        	count ();

		// End timing
		watch.stop ();

		// Keep only the 9800 iterations in the center
        	if (j > 100 && j < 9900)
        	{
            		time += watch.read ();
        	}
    	}

	// Spew the outputs
	printf ("%i\t%.3f\n", number, time/9800.0f);

	// 16 different threaded iterations
	for (i=1; i<17; i++)
	{
		// Reset everything
		number = i;
        	time = 0.0f;

		// Perform 10000 iterations
		for (j=0; j<10000; j++)
		{
			// Re-initialization
            	fill ();
            		overall = 0;

	       		for (k=0; k<number; k++)
	       		{
	       			counters[k].total = 0;
	       		}

			// Start timing
    	    		watch.start ();

			// Run the threads out
	        	for (k=0; k<number; k++)
	        	{
	   			pthread_create (&workers[k], NULL, countTH, (void *)k);
            		}

	        	for (k=0; k<number; k++)
	        	{
		        	pthread_join (workers[k], NULL);
	        	}
   
			// Finish timing once all threads are done
	        	watch.stop ();

			// Keep only the 9800 in the center
			if (j > 100 && j < 9900)
			{
        			time += watch.read ();
			}
        	}
    	
		printf ("%i\t%.3f\n", number, time/9800.0f);
    	}

	// Cleanup
	delete [] array;
}