/*
  OMPi OpenMP Compiler
  == Copyright since 2001 the OMPi Team
  == Dept. of Computer Science & Engineering, University of Ioannina

  This file is part of OMPi.

  OMPi is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  OMPi is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with OMPi; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/

/* thrpriv.c -- OMPi RunTime library; threadprivate vars and related stuff */

#include <stdlib.h>
#include <string.h>
#include "ort_prive.h"


/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 *                                                                   *
 * THREADPRIVATE VARIABLES                                           *
 *                                                                   *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */


/* See the internals documentation for the details of the implementation
 */
static void *get_ee_thrpriv(ort_eecb_t *e, int *varid, int size, void *origvar)
{
	int  vid, thrid, nkeys;
	void **vars;
	ort_eecb_t *parent;

	if (*varid == 0)    /* This var was never used before; must get an id */
	{
		/* we use the ort->preparation_lock, so as not to define 1 more lock */
		ee_set_lock((ee_lock_t *) &ort->preparation_lock);
		if (*varid == 0)
			*varid = ++(ort->thrpriv_num);
		SFENCE;
		ee_unset_lock((ee_lock_t *) &ort->preparation_lock);
	}

	vid = *varid;

	/* For the initial thread, tpvars are stored in its 0-th child space */
	parent = (e->level > 0) ? e->parent : e;
	nkeys = parent->mf->tpkeys[thrid = e->thread_num].alloted;
	vars = parent->mf->tpkeys[thrid].vars;
	if (vid >= nkeys)   /* never seen the key, realloc tkpkeys to accomodate */
	{
		vars = (vars == NULL) ? ort_alloc((vid + 10) * SIZEOF_CHAR_P) :
		       ort_realloc(vars, (vid + 10) * SIZEOF_CHAR_P);
		if (vars == NULL)
			ort_error(1, "[_ort_get_thrpriv]: memory allocation failed\n");
		memset(&vars[nkeys], 0, (vid + 10 - nkeys)*SIZEOF_CHAR_P);
		parent->mf->tpkeys[thrid].alloted = nkeys = vid + 10;
		parent->mf->tpkeys[thrid].vars = vars;
	}

	if (vars[vid] == NULL)   /* never accessed the variable; allocate space */
	{
		if (thrid == 0)
		{
			if (e->level > 0)  /* master thread; get the parent's var */
				vars[vid] = get_ee_thrpriv(e->parent, varid, size, origvar);
			else               /* initial thread; references origvar */
			{
				/* was: vars[vid] = origvar; */
				if ((vars[vid] = ort_alloc(size)) == NULL)
					ort_error(1, "[_ort_get_thrpriv]: out of initial thread memory\n");
				memcpy(vars[vid], origvar, size);   /* initialize */
			}
		}
		else
		{
			if ((vars[vid] = ort_alloc(size)) == NULL)
				ort_error(1, "[_ort_get_thrpriv]: out of memory\n");
			memcpy(vars[vid], origvar, size);   /* initialize */
		}
	}
	return (vars[vid]);
}


/* The interface */
void *_ort_get_thrpriv(void **key, int size, void *origvar)
{
	return (get_ee_thrpriv(__MYCB, (int *) key, size, origvar));
}


/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 *                                                                   *
 * COPYPRIVATE SUPPORT                                               *
 *                                                                   *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */


/* The SINGLE onwer initialization for copyprivate data.
 * It creates an array of pointers to its private data.
 */
void _ort_broadcast_private(int num, ...)
{
#ifdef EE_CLUSTERIZED
	extern int ncurr_work_threads;
#endif
	va_list     ap;
	ort_eecb_t  *me;
	ort_cpriv_t *cp;
	int         i;

	if ((me = __MYCB)->num_siblings == 1)  /* Nothing here if I am solo */
		return;

	cp = &(TEAMINFO(me)->copyprivate);
	cp->owner   = me->thread_num;
	cp->copiers = me->num_siblings;
#ifdef EE_CLUSTERIZED
	cp->copiers = ncurr_work_threads;
	cp->owner_node = ee_pid();
#endif
	cp->data    = (volatile void **) malloc(num * SIZEOF_CHAR_P);

	va_start(ap, num);
	for (i = 0; i < num; i++)
		cp->data[i] = va_arg(ap, void *);
	va_end(ap);
}


#if !defined(EE_CLUSTERIZED)

/* All threads copy copyprivate date from the SINGLE owner.
 * The arguments are pointer-size pairs.
 */
void _ort_copy_private(int num, ...)
{
	va_list     ap;
	int         i;
	void        **from, *arg;
	ort_cpriv_t *cp;
	ort_eecb_t  *me;

	if ((me = __MYCB)->num_siblings == 1)  /* Nothing here if I am solo */
		return;

	cp = &(TEAMINFO(me)->copyprivate);
	if (cp->owner != me->thread_num)   /* I am not the owner */
	{
		va_start(ap, num);
		from = (void **) cp->data;
		for (i = 0; i < num; i++)
		{
			arg = va_arg(ap, void *);
			memcpy(arg, from[i], va_arg(ap, int));
		}
		va_end(ap);
	}

#if defined(HAVE_ATOMIC_FAA) && !defined(EE_TYPE_PROCESS)
	i = _faa(&(cp->copiers), -1) - 1;
#else
	ee_set_lock(&cp->lock);
	i = --cp->copiers;
	ee_unset_lock(&cp->lock);
#endif

	if (i == 0)
		free(cp->data);                    /* Free allocated data */
}

#else

/* All threads copy copyprivate date from the SINGLE owner.
 * The arguments are pointer-size pairs.
 */
void _ort_copy_private(int num, ...)
{
	va_list     ap;
	int         size, i, mypid = ee_pid();
	void        **from, *arg;
	ort_cpriv_t *cp;
	ort_eecb_t  *me;
	static void *bcast_arg;
	extern int  ncurr_work_threads, localthrid0;
	extern pthread_barrier_t local_barrier;

	if ((me = __MYCB)->num_siblings == 1)  /* Nothing here if I am solo */
		return;

	cp = &(TEAMINFO(me)->copyprivate);
	va_start(ap, num);
	from = (void **) cp->data;
	for (i = 0; i < num; i++)
	{
		arg = va_arg(ap, void *);
		size = va_arg(ap, int);
		/* give owner thread the correct value so it can use it in broadcasat */
		if (cp->owner == me->thread_num)
			memcpy(arg, from[i], size);
		/* get the correct value to remote nodes; if any are in use now.
		 * Only the owner thread and the first thread of each remote node
		 * participate in the broadcast. */
		if (((cp->owner_node != mypid) && (me->thread_num == localthrid0)) ||
				((cp->owner == me->thread_num) && (me->num_siblings > ncurr_work_threads)))
			MPI_Bcast(arg, size, MPI_BYTE, cp->owner_node, OMPI_WORLD);
		/* thread that participated in broadcast has the correct value, the
		 * others must wait for it */
		if ((cp->owner == me->thread_num) || (me->thread_num == localthrid0))
			bcast_arg = arg;
		pthread_barrier_wait(&local_barrier);
		/* give all threads except the owner thread and the threads
		 * that participated in broadcast have the correct value */
		if ((cp->owner != me->thread_num) && (me->thread_num != localthrid0))
			memcpy(arg, bcast_arg, size);
		/* wait until everyone has the correct value before going to the
		 * next iteration (where bcast_arg will change) */
		pthread_barrier_wait(&local_barrier);
	}
	va_end(ap);

	if (cp->owner_node == mypid)
	{
		#if defined(HAVE_ATOMIC_FAA)
			i = _faa(&(cp->copiers), -1) - 1;
		#else
			ee_set_lock(&cp->lock);
			i = --cp->copiers;
			ee_unset_lock(&cp->lock);
		#endif
	}

	if ((cp->owner_node == mypid) && (i == 0))
		free(cp->data);                    /* Free allocated data */
}
#endif
