/*
  OMPi OpenMP Compiler
  == Copyright since 2001 the OMPi Team
  == Dept. of Computer Science & Engineering, University of Ioannina

  This file is part of OMPi.

  OMPi is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  OMPi is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with OMPi; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/

/* ort_barrier.c */

/*
 * 2010/12/18:
 *   First time around.
 */

#include "ort_prive.h"

#define CANCELLED_NONE      0    /* Barrier responses */
#define CANCELLED_PARALLEL  1
#define CANCELLED_TASKGROUP 2

#define HELPING   0
#define RELEASING 1

/* Used to index aligned_3int.value[] */
enum {ARRIVED_INDEX, RELEASED_INDEX, PHASE_INDEX};

#define ARRIVED(eeid)   bar->status[eeid].value[ARRIVED_INDEX]
#define RELEASED(eeid)  bar->status[eeid].value[RELEASED_INDEX]
#define PHASE(eeid)     bar->status[eeid].value[PHASE_INDEX]

/* Threshold beyond which the barrier array is reduced */
#define ALLOC_THRESHOLD 16

#if ORT_DEBUG & DBG_BARRIERS
int _taskbars, _defbars, _parbars;
void _show_bar_stats()
{
	fprintf(stderr, "\t[barrier stats] %d defbars (%d without tasks), %d parbars\n",
	        _defbars, _defbars - _taskbars, _parbars);
}
#endif


/* It is assumed here that only 1 thread calls this (so as to avoid
 * expensive bookkeeping).
 */
void default_barrier_init(ort_defbar_t **barp, int team_size)
{
	ort_defbar_t *bar = *barp;

#if !defined(EE_TYPE_PROCESS)

	if (bar == NULL)
		bar = *barp = (ort_defbar_t *) ort_calloc(sizeof(ort_defbar_t));

	if ((team_size > bar->alloc_size) ||
			((bar->alloc_size >= ALLOC_THRESHOLD) &&
			 (team_size <= (bar->alloc_size >> 1))))
	{
		bar->status = (volatile aligned_3int *)
			ort_realloc_aligned(team_size * sizeof(aligned_3int),
			                    (void **) &bar->actual_arr_ptr);
		bar->alloc_size = team_size;
	}

#else

	if (bar == NULL)
	{
		int mid;
		ee_shmalloc((void **) barp, sizeof(ort_defbar_t), &mid);
		bar = *barp;
		bar->alloc_size = 0;
	}
	if (bar->alloc_size != team_size)
	{
		int mid;
		/* If the barrier already exists, this is a memory leak */
		ee_shmalloc((void **) &(bar->status), team_size*sizeof(aligned_3int), &mid);
		bar->alloc_size = team_size;
	}
	
#endif

	/* Initialize */
	bar->state[0] = bar->state[1] = HELPING;
	bar->team_size = team_size;
	for (--team_size; team_size >= 0; team_size--)
	{
		ARRIVED(team_size) = 0;
		RELEASED(team_size) = 0;
		PHASE(team_size) = 0;
	}
}


/* Currently unused */
void default_barrier_destroy(ort_defbar_t **barp)
{
	if (barp == NULL || *barp == NULL)
		return;
	free((*barp)->actual_arr_ptr);
	free(*barp);
	*barp = NULL;
}


#define return_if_parallel_cancelled \
   if (check_cancel_parallel(me)) return CANCELLED_PARALLEL


/* This function checks for active cancellation, because
 * barrier is a cancellation point. A spinning (in barrier) thread only checks
 * for cancel parallel, while at the end of barrier it also checks for
 * cancel taskgroup. This is because we must allow threads to early exit the
 * barrier when parallel is canceled even if not all siblings have entered
 * the barrier function.
 *
 * In this barrier, we cannot skip directly to the 2nd synchronization
 * point (if tasks exist) as it may lead to deadlocks.
 */
static int default_barrier_wait_with_cancel(ort_defbar_t *bar, int eeid)
{
#if !defined(AVOID_OMPI_DEFAULT_TASKS)
	ort_eecb_t *me = __MYCB;
	int time = 0, check_for_tasks = 1;
	volatile int *tasks_exist = &(TEAMINFO(me)->at_least_one_task);

	int phase = (PHASE(eeid) ^= 1);

	if (eeid > 0)
	{
		/* declare my arrival and loop till the master releases me */
		for (ARRIVED(eeid) = 1; ARRIVED(eeid) == 1; )
		{
			if (bar->state[phase] == HELPING)   /* check for correct state */
			{
				if (check_for_tasks && *tasks_exist)  /* if there are tasks, help */
				{
					finish_all_team_tasks(me);  /* participate; all tasks done */
					check_for_tasks = 0;            /* must participate exactly once */
				}
				return_if_parallel_cancelled;
			}
			YIELD_WHEN_SPINNNG(time, BAR_YIELD);
		}
		
		if (bar->state[phase] == HELPING) /* check for correct state */
			if (*tasks_exist)                     /* maybe I missed participating */
			{
				if (check_for_tasks)
					finish_all_team_tasks(me);    /* participate */
				/* check *after* tasking consensus since that, too, aborts on cancel */
				return_if_parallel_cancelled; 
				
				/* 2nd synchronization point, to make sure everybody left the consensus
				 * procedure, before the master resets the task_exists flag to 0.
				 */
				RELEASED(eeid) = 1;
				SPIN_WHILE(RELEASED(eeid) == 1, BAR_YIELD);
			};
	}
	else /* master */
	{
		for (eeid = 1; eeid < bar->team_size; eeid++)          /* for all threads */
			for (; ARRIVED(eeid) != 1; )        /* wait for the arrival of each one */
			{
				if (check_for_tasks && *tasks_exist) /* similar to the other threads */
				{
					finish_all_team_tasks(me);  /* participate; all tasks done */
					check_for_tasks = 0;            /* must participate exactly once */
				}
				if (check_cancel_parallel(me))
					goto BAR_CANCEL_PARALLEL;
				YIELD_WHEN_SPINNNG(time, BAR_YIELD);
			};
		
		if (check_cancel_parallel(me))
		{
			BAR_CANCEL_PARALLEL:
				if (bar->state[1-phase] == RELEASING)  /* see below */
					bar->state[1-phase] = HELPING;
				TEAMINFO(me)->cancel_sec_active = 0;
				TEAMINFO(me)->cancel_for_active = 0;
				SFENCE;
				return CANCELLED_PARALLEL;
		}
			
		/* If required, reset state of next barrier */
		if (bar->state[1-phase] == RELEASING)
			bar->state[1-phase] = HELPING;
		NON_TSO_FENCE;
		
		if (*tasks_exist)                     /* I missed participating */
		{
			if (check_for_tasks)
				finish_all_team_tasks(me);    /* participate */
						
			/* release threads from the 1st synchronization point */
			for (eeid = 1; eeid < bar->team_size; eeid++)
				ARRIVED(eeid) = 0;
				
			/* wait till all threads reach the second synchronization point */
			for (eeid = 1; eeid < bar->team_size; eeid++)
				SPIN_WHILE(RELEASED(eeid) != 1, BAR_YIELD);
			
			/* prepare for next use */
			TEAMINFO(me)->cancel_sec_active = 0;
			TEAMINFO(me)->cancel_for_active = 0;
			
			/* safely reset status and tasking flag */
			TEAMINFO(me)->nonidle = me->num_siblings;
			*tasks_exist = 0;
			SFENCE;
			
			/* release threads from the 2nd synchronization point */
			for (eeid = 1; eeid < bar->team_size; eeid++)
				RELEASED(eeid) = 0;
#if ORT_DEBUG & DBG_BARRIERS
			_taskbars++;
#endif
		}
		else                       /* no tasks ever created; only 1 sync point */
		{
			/* prepare for next use */
			TEAMINFO(me)->cancel_sec_active = 0;
			TEAMINFO(me)->cancel_for_active = 0;
			SFENCE;
			
			/* release threads (1st synchronization point) */
			bar->state[phase] = RELEASING;                    /* releasing state */
			SFENCE;
			for (eeid = 1; eeid < bar->team_size; eeid++)
				ARRIVED(eeid) = 0;
		} /* no tasks */
#if ORT_DEBUG & DBG_BARRIERS
		_show_bar_stats();
#endif
	} /* master */
	
	me->tasking.status = TBUSY;  /* everybody becomes busy again */
	
	/* The check for cancel taskgoup must be done here because all the barrier
	 * functionality must be present to ensure correct thread synchronization.
	 */
	return_if_parallel_cancelled;
	if (check_cancel_taskgroup(me))
		return CANCELLED_TASKGROUP;
	return CANCELLED_NONE;
#endif
}


/* Optimized default_barrier_wait for the case cancellation is disabled.
 */
static int default_barrier_wait_without_cancel(ort_defbar_t *bar, int eeid)
{
#if !defined(AVOID_OMPI_DEFAULT_TASKS)
	ort_eecb_t *me = __MYCB;
	int time = 0;
	volatile int *tasks_exist = &(TEAMINFO(me)->at_least_one_task);

	int phase = (PHASE(eeid) ^= 1);

	if (eeid > 0)
	{
		/* declare my arrival and loop till master releases me (1st sync point) */
		for (ARRIVED(eeid) = 1; ARRIVED(eeid) == 1; )
		{
			if (bar->state[phase] == HELPING && *tasks_exist)
			{ /* if there are tasks, help */
				finish_all_team_tasks(me);  /* participate; all tasks done */
				goto TASKSDONE_THREAD;          /* skip directlly to 2nd sync */
			};
			YIELD_WHEN_SPINNNG(time, BAR_YIELD);
		}
		
		/* I missed participating */
		if (bar->state[phase] == HELPING && *tasks_exist)
		{
			finish_all_team_tasks(me);      /* participate */
			
			/* Make sure everybody left the consensus procedure, before the master 
			 * resets the task_exists flag to 0 (2nd sync point)
			 */
			TASKSDONE_THREAD:
			RELEASED(eeid) = 1;
			SPIN_WHILE(RELEASED(eeid) == 1, BAR_YIELD);
		}
	}
	else /* master */
	{
		for (eeid = 1; eeid < bar->team_size; eeid++)          /* for all threads */
			for (; ARRIVED(eeid) != 1; )        /* wait for the arrival of each one */
			{
				if (*tasks_exist)                 /* similar to the other threads */
				{
					finish_all_team_tasks(me);  /* participate; all tasks done */
					goto TASKSDONE_MASTER;          /* skip directlly to 2nd sync */
				}
				YIELD_WHEN_SPINNNG(time, BAR_YIELD);
			};
		
		NON_TSO_FENCE;
		if (*tasks_exist)                     /* I missed participating */
		{
			finish_all_team_tasks(me);      /* participate */
			
			TASKSDONE_MASTER:
			
			/* if required, reset state of next barrier */
			if (bar->state[1-phase] == RELEASING)
				bar->state[1-phase] = HELPING;
			
			/* Wait for all threads in the 2nd synchronization point */
			for (eeid = 1; eeid < bar->team_size; eeid++)
			{
				ARRIVED(eeid) = 0;     /* Reset the flags of 1st sync point */
				SPIN_WHILE(RELEASED(eeid) != 1, BAR_YIELD);
			}
			
			/* Safely reset status and tasking flag */
			TEAMINFO(me)->nonidle = me->num_siblings;
			*tasks_exist = 0;
			SFENCE;
			
			/* Release threads from the 2nd synchronization point */
			/* We do not need to mark that we are in a RELEASING state and block
			 * fast threads from helping in the next barrier.
			 */
			for (eeid = 1; eeid < bar->team_size; eeid++)
				RELEASED(eeid) = 0;

#if ORT_DEBUG & DBG_BARRIERS
			_taskbars++;
#endif
		}
		else                       /* no tasks ever created; only 1 sync point */
		{
			/* There are no tasks; make sure nobody checks for them (so that if a 
			 * released thread (below) is fast and produces tasks, the ones still 
			 * spinning on this barrier do not see that tasks_exist became true).
			 */
			bar->state[phase] = RELEASING;
			/* if required, reset state of next barrier */
			if (bar->state[1-phase] == RELEASING)
				bar->state[1-phase] = HELPING;
			SFENCE;
			/* Release threads (which currently spin at 1st sync point) */
			for (eeid = 1; eeid < bar->team_size; eeid++)
				ARRIVED(eeid) = 0;
		} /* no tasks */
#if ORT_DEBUG & DBG_BARRIERS
		_show_bar_stats();
#endif
	} /* master */
	
	me->tasking.status = TBUSY;  /* everybody becomes busy again */
	return CANCELLED_NONE;
#endif
}


int default_barrier_wait(ort_defbar_t *bar, int eeid)
{
#if ORT_DEBUG & DBG_BARRIERS
	if (eeid == 0) _defbars++;
#endif

	if (CANCEL_ENABLED())
		return default_barrier_wait_with_cancel(bar, eeid);
	else
		return default_barrier_wait_without_cancel(bar, eeid);
}


int _ort_barrier_me(void)
{
	ort_eecb_t *me = __MYCB;
	if (me->num_siblings == 1)
		return 0;
	return ( ee_barrier_wait(TEAMINFO(me)->barrier, me->thread_num) );
}


/* At the end of a parallel region the compiler injects an
 * _ort_taskwait(2) call ("2" signifying the end of a parallel region,
 * i.e. not a plain taskwait but an end-of-parallel barrier).
 * Then _ort_taskwait() calls default_barrier_wait_in_parallel().
 */
void default_barrier_wait_in_parallel(ort_defbar_t *bar, int eeid)
{
#if ORT_DEBUG & DBG_BARRIERS
	if (eeid == 0) _parbars++;
#endif

	if (CANCEL_ENABLED())
		default_barrier_wait_with_cancel(bar, eeid);
	else
		default_barrier_wait_without_cancel(bar, eeid);
}


#undef HELPING
#undef RELEASING
#undef ARRIVED
#undef RELEASED
#undef PHASE
