/*
  OMPi OpenMP Compiler
  == Copyright since 2001 the OMPi Team
  == Dept. of Computer Science & Engineering, University of Ioannina

  This file is part of OMPi.

  OMPi is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  OMPi is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with OMPi; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/

/* queue.c
 * This file implements the broker queue, as proposed by
 * Schmalstieg et al.
 */
 
#include <stdio.h>
#include <time.h>
#include <stdbool.h>
#include "globals.h"
#include "barrier.h"
#include "locks.h"
#include "queue.h"

__DEVQLFR
static int backoff()
{
	static int max_cycles = 8192;
	clock_t cycles;
	clock_t start = clock();
	clock_t now;
	
	max_cycles *= 2;
	
	for (;;) {
		now = clock();
		cycles = now > start 
		       ? now - start 
		       : now + (0xffffffff - start);
		if (cycles >= max_cycles) {
			return max_cycles;
		}
	}
	
	return 0;
}


__DEVQLFR
static void wait_for_ticket(bq_t *queue, int pos, unsigned int expected)
{
	unsigned int ticket = queue->tickets[pos];
	while (ticket != expected)
	{
		backoff();
		ticket = queue->tickets[pos];
	}
}


__DEVQLFR
static void put_data(bq_t *queue, void *item, unsigned int size)
{
	unsigned int pos = atomicAdd(&(queue->tail),1);
	unsigned int P = pos % queue->max_size;
	
	wait_for_ticket(queue, P, 2*(pos/queue->max_size));
	queue->buffer[P].data = malloc(size);
	memcpy(queue->buffer[P].data, item, size);
	atomicExch(&(queue->tickets[P]), 2*(pos/queue->max_size)+1);
}


__DEVQLFR
static bq_item_t *read_data(bq_t *queue)
{
	bq_item_t *el;
	unsigned int pos = atomicAdd(&(queue->head),1);
	unsigned int P = pos % queue->max_size;
	
	wait_for_ticket(queue, P, 2 * (pos/queue->max_size) + 1);
	el = &(queue->buffer[P]);
	atomicExch(&(queue->tickets[P]), 
	           2*((pos+queue->max_size)/queue->max_size));
	return el;
}


__DEVQLFR 
static bool ensure_enqueue(bq_t *queue)
{
	unsigned int n = queue->count;
	while (true)
	{
		if (n >= queue->max_size) 
			return false;
		if (atomicAdd(&queue->count, 1) < queue->max_size) 
			return true;
		n = atomicSub(&queue->count, 1) - 1;
	}
	return false;
}


__DEVQLFR 
static bool ensure_dequeue(bq_t *queue)
{
	unsigned int n = queue->count;
	while (true)
	{
		if (n <= 0) 
			return false;
		if (atomicSub(&queue->count, 1) > 0) 
			return true;
		n = atomicAdd(&queue->count, 1) + 1;
	}
	return false;
}


__DEVQLFR
bool bq_enqueue(bq_t *queue, void *node, unsigned int size)
{
	int num = (queue->max_size + (queue->max_threads/2));
	while (!ensure_enqueue(queue))
	{
		unsigned int head = queue->head, tail = queue->tail;
		if ((tail-head >= queue->max_size) && (tail-head < num))
			return false;
	}
	put_data(queue, node, size);
	return true;
}


__DEVQLFR
bq_item_t *bq_dequeue(bq_t *queue)
{
	int num = (queue->max_size + (queue->max_threads/2));
	while (!ensure_dequeue(queue))
	{
		unsigned int head = queue->head, tail = queue->tail;
		if (num <= (tail - head - 1)) {
			return (bq_item_t *) NULL;
		}
	}
	return read_data(queue);
}


/* Should be executed by 1 CUDA thread */
__DEVQLFR
void bq_init(bq_t *queue, unsigned int max_size, unsigned int max_threads)
{
	int i;
	if (!queue) return;
	
	queue->buffer = (bq_item_t *) malloc(max_size * sizeof(bq_item_t));
	
	queue->tickets = (unsigned int *) malloc(max_size * sizeof(unsigned int));
	for (i = 0; i < max_size; i++)
		queue->tickets[i] = 0;
		
	queue->max_size = max_size;
	queue->count = 0;
	queue->head = queue->tail = 0;
	queue->max_threads = max_threads;
}


/* Should be executed by 1 CUDA thread */
__DEVQLFR
void bq_destroy(bq_t *queue)
{
	int i;
	if (!queue) return;
	
	for (i = 0; i < queue->max_size; i++)
		queue->tickets[i] = 0;
		
	queue->max_size = 0;
	queue->count = 0;
	queue->head = queue->tail = 0;
	queue->max_threads = 0;
	
	free(queue->buffer);
	free(queue->tickets);
}