/*
  OMPi OpenMP Compiler
  == Copyright since 2001 the OMPi Team
  == Dept. of Computer Science & Engineering, University of Ioannina

  This file is part of OMPi.

  OMPi is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  OMPi is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with OMPi; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/

/* ORT_PRIVE.H
 * Definitions and types needed for building ORT.
 * This is only included by ort.c.
 */

#ifndef __ORT_PRIVE_H__
#define __ORT_PRIVE_H__

#include "ort.h"
#include "ort_defs.h"
#include <ee.h>


/* * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 *                                                       *
 *  CONSTANTS AND MACROS                                 *
 *                                                       *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * */


#define _OMP_SINGLE   0         /* Workshare types */
#define _OMP_SECTIONS 1
#define _OMP_FOR      2

#define testnotset(X) if((X)==0) {(X)=1; FENCE;}
#define atomic_read(X) *((int*)X)

/* Busy waiting with yield.
 * We do a FENCE in the beginning for many reasons. One is that
 * usually we wait on stuff other threads may modify. Another
 * is that at least one optimizing compiler optimizes away the
 * loop, incorrectly.
 */
#define OMPI_WAIT_WHILE(f, trials_before_yielding) { \
		volatile int time = 0; \
		for ( ; (f); time++) \
			if (time == (trials_before_yielding)) { \
				time = -1; \
				ee_yield(); \
			}; \
	}


/* * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 *                                                       *
 *  EELIB API                                            *
 *                                                       *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * */


#if defined(EE_TYPE_PROCESS)

  /* Extra API for process-based EELIBs { */
	#define ee_pid               oprc_pid
	#define ee_shmfree           oprc_shmfree
	#define ee_shmalloc          oprc_shmalloc
	#define ee_shm_alloc         oprc_shm_alloc
	#define ee_shm_ptr           oprc_shm_ptr
	/* } */

	#define ee_key_t             oprc_key_t
	#define ee_key_create        oprc_key_create
	#define ee_getspecific       oprc_getspecific
	#define ee_setspecific       oprc_setspecific

	#define ee_initialize        oprc_initialize
	#define ee_finalize          oprc_finalize
	#define ee_request           oprc_request
	#define ee_create            oprc_create
	#define ee_yield             oprc_yield
	#define ee_waitall           oprc_waitall
	#define ee_bindme            oprc_bindme
	#define ee_getselfid         oprc_getselfid
	#define ee_getself           oprc_getself

	#define ee_lock_t            oprc_lock_t
	#define ee_init_lock         oprc_init_lock
	#define ee_destroy_lock      oprc_destroy_lock
	#define ee_set_lock          oprc_set_lock
	#define ee_unset_lock        oprc_unset_lock
	#define ee_test_lock         oprc_test_lock

	#if defined(AVOID_OMPI_DEFAULT_BARRIER)
		#define ee_barrier_t         oprc_barrier_t
		#define ee_barrier_init      oprc_barrier_init
		#define ee_barrier_destroy   oprc_barrier_destroy
		#define ee_barrier_wait      oprc_barrier_wait
	#endif

	#if defined(AVOID_OMPI_DEFAULT_TASKS)
		#define ee_start_implicit_task    oprc_start_implicit_task
		#define ee_new_task               oprc_new_task
		#define ee_taskwait               oprc_taskwait
		#define ee_task_immediate_start   oprc_task_immediate_start
		#define ee_task_immediate_end     oprc_task_immediate_end
		#define ee_check_throttling       oprc_check_throttling
		#define ee_set_currtask           oprc_set_currtask
		#define ee_get_currtask           oprc_get_currtask
		#define ee_taskenv_alloc          oprc_taskenv_alloc
		#define ee_taskenv_free           oprc_taskenv_free
	#endif

#else

	#define ee_key_t              othr_key_t
	#define ee_key_create         othr_key_create
	#define ee_getspecific        othr_getspecific
	#define ee_setspecific        othr_setspecific

	#define ee_initialize         othr_initialize
	#define ee_finalize           othr_finalize
	#define ee_request            othr_request
	#define ee_create             othr_create
	#define ee_yield              othr_yield
	#define ee_waitall            othr_waitall
	#define ee_bindme             othr_bindme
	#define ee_getselfid          othr_getselfid
	#define ee_getself            othr_getself

	#define ee_lock_t            othr_lock_t
	#define ee_init_lock         othr_init_lock
	#define ee_destroy_lock      othr_destroy_lock
	#define ee_set_lock          othr_set_lock
	#define ee_unset_lock        othr_unset_lock
	#define ee_test_lock         othr_test_lock

	#if defined(AVOID_OMPI_DEFAULT_BARRIER)
		#define ee_barrier_t         othr_barrier_t
		#define ee_barrier_init      othr_barrier_init
		#define ee_barrier_destroy   othr_barrier_destroy
		#define ee_barrier_wait      othr_barrier_wait
	#endif

	#if defined(AVOID_OMPI_DEFAULT_TASKS)
		#define ee_start_implicit_task    othr_start_implicit_task
		#define ee_new_task               othr_new_task
		#define ee_taskwait               othr_taskwait
		#define ee_task_immediate_start   othr_task_immediate_start
		#define ee_task_immediate_end     othr_task_immediate_end
		#define ee_check_throttling       othr_check_throttling
		#define ee_set_currtask           othr_set_currtask
		#define ee_get_currtask           othr_get_currtask
		#define ee_taskenv_alloc          othr_taskenv_alloc
		#define ee_taskenv_free           othr_taskenv_free
	#endif

#endif

#if !defined(AVOID_OMPI_DEFAULT_BARRIER)
	#define ee_barrier_t         ort_defbar_t
	#define ee_barrier_init      default_barrier_init
	#define ee_barrier_destroy   default_barrier_destroy
	#define ee_barrier_wait      default_barrier_wait
#endif


/* This is for the case where some eelib supports nested parallelism
 * but requires dynamic to be turned on.
 */
#define check_nested_dynamic(n,d)\
	if ((n) && !(d) && !ort->eecaps.supports_nested_nondynamic) {\
		ort_warning("the EE library reports that nested and NOT dynamic\n"\
		            "   parallelism cannot be supported.\n"\
		            "   Try enabling dynamic adjustment using either of:\n"\
		            "    >> OMP_DYNAMIC environmental variable, or\n"\
		            "    >> omp_set_dynamic() call.\n\n"\
		            "*** disabling support for nested parallelism for now ***\n"\
		            "[end of ORT warning]\n");\
		(n) = 0;\
	}

#ifdef OMPI_OMP_EXT
/*
 * Added for tags
 */
typedef struct tag_s
{
	char*         tag_string;          /* Current tag */
	struct tag_s* next_tag;    /* Next tag */
	xsched_data_t xsched;
	int           nthreads[MAX_NUMTHR_LEVELS];
	int           set_nthrlevs_tag;
} tag_t;
#endif


/* * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 *                                                       *
 *  TYPES                                                *
 *                                                       *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * */


/*
 * Types/macros related to tasks
 */
#define TASKQUEUESIZE (ort->taskqueuesize)

#if !defined(AVOID_OMPI_DEFAULT_TASKS)

struct half_node
{
	void            *(*func)(void *);    /* Task function */
	struct Node     *parent;             /* Task's parent id */
	volatile struct Node *next;          /* For use in garbage collector */
	int             isfinal;             /* OpenMP 3.1 */
	void            *taskgroup;          /* OpenMP 4.0 */
	void            *taskscope;          /* OpenMP 5.0 */
#if !defined(HAVE_ATOMIC_FAA)
	ee_lock_t     lock;                /* Lock queue for atomic access */
#endif
};

/* Structure for 1 task reduction var; the thrvar table holds private
 * copies for each thread that participated.
 */
typedef struct {
	void *origvar;   /* The original var */
	int  size;       /* Size in bytes */
	void (*combiner)(void*, void*, int size);   /* How to reduce */
	void **thrvar;   /* Pointers to thread-private vars (1 per thread) */
} taskred_t;


typedef struct taskgroup_s
{
	struct taskgroup_s *parent;          /* Taskgroup's parent */
	volatile int       is_canceled;      /* Cancel flag for taskgroup */
} taskgroup_t;


typedef struct taskscope_s
{
	struct taskscope_s *parent;          /* Taskscope's parent */
	int                numreds;          /* # task reduction variables */
	taskred_t          *reds;            /* task reduction variables array */
} taskscope_t;


typedef struct Node
{
	void            *(*func)(void *);    /* Task function */
	struct Node     *parent;             /* Task's parent id */
	struct Node     *next;               /* For use in garbage collector */
	int              isfinal;            /* OpenMP 3.1 */
	taskgroup_t     *taskgroup;          /* OpenMP 4.0 */
	taskscope_t     *taskscope;          /* OpenMP 5.0 */
#if !defined(HAVE_ATOMIC_FAA)
	ee_lock_t        lock;               /* Lock queue for atomic access */
#endif

	/* Padding here... */
#define CHAR_PAD_CACHE(A) ((CACHE_LINE - (A % CACHE_LINE))/sizeof(char))
	char             pad[CHAR_PAD_CACHE(sizeof(struct half_node))];
	void            *funcarg;        /* Task function argument */
	volatile int     num_children;   /* Number of task's descendants*/
	/* Check out whether i inherited task node from my father */
	int              inherit_task_node;
	volatile int     occupied;
	int              rtid;           /* Special task id (nested loop to task) */
	ort_task_icvs_t  icvs;           /* OpenMP3.0 */

	void            *dependencies;   /* OpenMP 4.0 task dependencies (opaque) */
#ifdef OMPI_OMP_EXT
	tag_t           *tag_stack;
#endif
} ort_task_node_t;


/* Task nodes pool of a thread, for a single given task function 
 * (i.e. there is one for each task region in the program).
 * It has a number of pre-allocated task nodes (= task queue size + 
 * # threads in the team + 3) for speed, plus a recycling list.
 * The pools of a thread are linked in a list.
 * Each node in the pool allocates a memory area for the data environment
 * of the corresponding task function. The pointer is stored hidden.
 */
typedef struct tnode_pool_s
{
	void                *(*task_func)(void *); /* to identify the size */
	ort_task_node_t     *prealloced; /* taskqueue size + team size + 3 */
	ort_task_node_t     *recycler;
	struct tnode_pool_s *next;
} ort_task_node_pool_t;


typedef struct queue_s
{
	volatile int top;     /* Must be volatile in order to read it atomicaly */
	volatile int bottom;
	ort_task_node_t **tasks;
#if !defined(HAVE_ATOMIC_FAA) || !defined(HAVE_ATOMIC_CAS)
	ee_lock_t lock;            /* for locking the queue */
#endif

	/* Pointers to task-counters of my children implicit task */
	volatile int *implicit_task_children;
} ort_task_queue_t;


/* Hold data for task implementation */
typedef enum { TIDLE = 0, TEMPTY, TBUSY } tstatus_e;
typedef struct
{
	/* Thread's private data */
	/* Have to know what task i currently execute. Needed in task wait */
	ort_task_node_t *current_executing_task;
	/* Task environment pool */
	ort_task_node_pool_t *task_node_pool;
	/* Maximum number of mates in my team */
	int max_mates;
	/* OpenMP 4.0 Used to identify closely nested implicit task */
	ort_task_node_t *current_implicit_task;
	/* New stuff */
	tstatus_e status;

#ifdef ORT_DEBUG
	long tasks_enqueued;
	long tasks_executed_by_worker;
	long tasks_executed_by_thief;
	long immediate_execution_due_to_full_pool;
	long immediate_execution_due_to_full_task_queue;
	long immediate_execution_due_to_new_task_exec;
	long throttled;             /* Executed immediately */
	long throttled_pool;        /*    .. due to full pool */
	long throttled_queue;       /*    .. due to full task queue */
	long throttled_if;          /*    .. due to if(FALSE) clause */
	long throttled_final;       /*    .. due to final(TRUE) clause */
	long throttled_serial;      /*    .. outside of parallel */
	long in_throttle;           /* # times i got in throttling mode */
	long out_throttle;          /* # times i got out of throttling */
	long fail_theft_attemts;    /* # times tried to steal from empty queue */
	long empty2idle;          /* # times i transition from TEMPTY to TIDLE */
	long idle2empty;          /* # times i transition from TIDLE to TEMPTY */
#endif
} ort_tasking_t;

#else

/* Hold data for task implementation */
typedef struct
{
	/* Thread's private data */
	/* Have to know what task i currently execute. Needed in task wait */
	ort_task_node_t *current_executing_task;
} ort_tasking_t;

#endif


/*
 * Other types
 */


/* Doacross loops configuration:
 * !DOACROSS_FAST: lowest memory requirements, only works for static schedules
 *  DOACROSS_FAST: fastest, works for all schedules, high memory consumption
 */
#define DOACROSS_FAST

/* For FOR loops */
typedef struct
{
	int  hasordered;                /* flag for plain ordered clause */                  
	int  ordnum;                    /* ordered(num) - only used for doacross */
	int  colnum;                    /* collapse(num) - ditto */
	int  niters;                    /* total # iterations (after collapsing) */
	volatile u_long iter;           /* The next iteration to be scheduled */
	volatile u_long *curriter;      /* Current iteration for each child */
#ifdef EE_TYPE_PROCESS
	volatile int curriter_shmid;       /* Memory segment ID */
#endif
#ifdef DOACROSS_FAST
	volatile unsigned int *mapit;   /* Doacross iteration bitmap */
#else
	int  schedtype;
	int  chsize;
#endif
#ifdef OMPI_XTRA_LOOPSCHEDS
	volatile int xtra_int; // Used by some of the new schedule techniques
#endif
} ort_forloop_t;


/* For workshare regions */
typedef struct
{
	ee_lock_t    reglock;                         /* Lock for the region */
	volatile int empty;                 /* True if no thread entered yet */
	volatile int left;            /* # threads that have left the region */
	int          inited;              /* 1 if the region was initialized */

	/* SECTIONS & FOR specific data */
	volatile int  sectionsleft; /* Remaining # sections to be given away */
	ort_forloop_t forloop;      /* Stuff for FOR regions */
} wsregion_t;


/* A table of simultaneously active workshare regions */
typedef struct
{
	/* This is for BLOCKING (i.e. with no NOWAIT clause) regions. */
	wsregion_t blocking;
	/* This is for keeping track of active NOWAIT regions.  */
	volatile int headregion, tailregion;   /* volatile since all threads watch */
	wsregion_t active[MAX_ACTIVE_REGIONS];
} ort_workshare_t;


/* Holds pointers to copyprivate vars. */
typedef struct
{
	volatile void  **data;
	int            owner;
#ifdef EE_CLUSTERIZED
	int            owner_node; /* node in which the owner thread resides */
#endif
	int            copiers;
	ee_lock_t      lock;
} ort_cpriv_t;


/* Holds the key-value pairs for threadprivate variables */
typedef struct
{
	int  alloted;    /* size of vars table */
	void **vars;
} ort_tptable_t;


/* Master Control Block Fields (MCBF).
 * Contains the fields used when acting as a master thread.
 * These fields were previously in ort_eecb_t.
 */
typedef struct ort_eecb_s ort_eecb_t;
typedef struct ort_mcbf_s ort_mcbf_t;
struct ort_mcbf_s
{
	ee_barrier_t    *barrier;                        /* Barrier for my children */
	void            *redinfo;              /* Reduction data/results of my kids */
	int             num_children;
#if defined(EE_TYPE_PROCESS)
	int             workfunc;     /* The func ID executed by my children */
#else
	ttkfunc_t       workfunc;     /* The func pointer executed by my children */
#endif
	ort_workshare_t workshare;    /* Some fields volatile since children snoop */
	
	/* Threadprivate */
	ort_cpriv_t     copyprivate;  /* For copyprivate; owner stores data here and
                                   the rest of the children grab it from here */
	ort_tptable_t   *tpkeys;               /* Threadprivate vars of my children */
	int             tpksize;         /* in essence, max # children ever created */
	
	/* Cancellation */
	volatile int    cancel_par_active;           /* Flags to signal cancelation */
	volatile int    cancel_sec_active;
	volatile int    cancel_for_active;
	
#if !defined(AVOID_OMPI_DEFAULT_TASKS)
	/* Tasking */
	ort_task_queue_t *queues;         /* task queues, one for each child thread */
	int             numqueues;                   /* # queues in the above table */
	volatile int    at_least_one_task; /* true if >=1 tasks created by the team */
	int             nonidle;                  /* used for end-of-work consensus */
#endif

	omp_proc_bind_t bind_override;    /* _false when no proc_bind clause exists */
};


/* Execution entity control block (eecb).
 * ORT keeps such a block for every ee; it contains fields necessary
 * for runtime bookkeeping.
 * The eecb's form a tree, where child ee's have pointers to their
 * parent's eecb.
 */
struct ort_eecb_s
{
	/* First the fields needed when I become a parent
	 */
	ort_mcbf_t *mf;             /* The fields that will be used by my children */
	ort_eecb_t *me_master;      /* Temp eecb storage for me as master */


	/* Fields for me, as a member of a team
	 */
	ort_eecb_t *parent;          /* this is also used for recycling freed EECBs */

	int cancel_sec_me;                         /* Cancel flags in serial region */
	int cancel_for_me;
	int thread_num;                                /* Thread id within the team */
	int num_siblings;                                   /* # threads in my team */
	int level;                            /* At what level of parallelism I lie */
	int activelevel;             /* At what *active* level of parallelism I lie */
	void *shared_data;          /* Pointer to shared struct of current function */
	ort_eecb_t *sdn;            /* Where I will get shared data from; normally
                                   from my parent, except at a false parallel
                                   where I get it from myself since I am
                                   the only thread to execute the region. */
	ort_eecb_t *me_partask;              /* When i run a task from parallel for */
	int mynextNWregion;        /* Non-volatile; I'm the only thread to use this */
	int nowaitregion;             /*  True if my current region is a NOWAIT one */
	int currplace; /* The place I'm currently executing (place_partition index) */
	int pfrom, pto;                      /* Current place subpartition interval */
	int cgid;                       /* The contention group I belong to (>= -1) */

	/* Tasking structures
	 */
	taskgroup_t *tg_recycler;                         /* Taskgroup recycler */
#if !defined(AVOID_OMPI_DEFAULT_TASKS)
	ort_tasking_t tasking;
#endif

	/* Thread-library specific data
	 */
	void *ee_info;                                 /* Handled by the ee library */
};


/* List holding pointers to user's shared global variables (only if ee=proc) */
typedef struct ort_sglvar_s ort_sglvar_t;
struct ort_sglvar_s
{
	void         **varptr;      /* Pointer to user's global variable */
	int          size;          /* sizeof(var) */
	void         *initvalue;    /* Initializer */
	ort_sglvar_t *next;
};

#define FIFO        1
#define LIFO        0

#define FALSE       0
#define TRUE        1
#define AUTO        2


/* League of teams (OpenMP 4.5/5.0) */
typedef struct
{
	int            numteams;    /* # initial teams participating in the league */
	int            threadlimit; /* max # threads in contention groups */
	void           *(*func)(void *);   /* The function of initial threads */
	void           *funcarg;           /* The argument to the function */
	int volatile  *cg_size;     /* # active threads per contention group */
	ort_eecb_t   **cg_inithr;   /* the initial threads of the contention groups */
} league_t;                 /* NULL cg_inithr signifies the very initial team */


/* All global variables ORT handles; if ee=proc, this is also placed
 * in shared memory.
 */
typedef struct
{
	ort_icvs_t         icvs;               /* Global ICVs */
	ort_caps_t         eecaps;             /* Capabilities of the EELIB */
	int                embedmode;          /* To suppress some printouts */

	ort_module_t       module_host;        /* Module to use host as a device */
	ort_module_t      *modules;            /* Device modules (except host) */
	int                num_modules;
	int                num_local_modules;
	int                num_remote_modules;
	ort_device_t      *ort_devices;        /* not working with ee_process yet */
	ort_device_t     **hostdevs;           /* only host devices */
	int                num_devices;        /* includes the host device */
	int                num_local_devices;
	int                num_remote_devices;
	int                *device_status;     /* used for device scheduling */

	volatile ee_lock_t atomic_lock;        /* Global lock for atomic */
	volatile ee_lock_t preparation_lock;   /* For initializing user locks */
	volatile ee_lock_t eecb_rec_lock;
	volatile ee_lock_t host_kernel_lock;   /* For executing kernels */
	ort_eecb_t        *eecb_recycler;      /* not working with ee_process yet */
	ort_eecb_t        *initial_eecb;       /* the very initial eecb */

	int                thrpriv_num;        /* # threadprivate variables */
	int                nthr_per_level[MAX_NUMTHR_LEVELS]; /* nthreads[] list */
	omp_proc_bind_t    bind_per_level[MAX_BIND_LEVELS]; /* binds[] list, v4.0.0 */
	int                set_nthrlevs;       /* # levels of nthreads defined */
	int                set_bindlevs;       /* # levels of nthreads defined */

	int                task_dequeue_policy;/* Dequeueing from my Q? FIFO:LIFO */
	int                taskqueuesize;      /* Size of task queues */
	int                dynamic_taskqueuesize; /* Adapt task queue size */
	int                partotask_policy;   /* Nested parallel-for/sections2task*/

	int              **place_partition;    /* OpenMP4 */
	league_t           league;             /* The current league */
	int               *argc;               /* Pointer to main's argument */
	char            ***argv;               /* Pointer to main's argument */
} ort_vars_t;


#define ISINITLEAGUE()  (ort->league.cg_inithr == NULL)
#define CG_SIZE(cgid) (ort->league.cg_size[cgid])

#define CANCEL_ENABLED() (ort->icvs.cancel)   /* True if enabled */

#define TEAMINFO(eecb) ((eecb)->sdn->mf)


/* * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 *                                                       *
 *  VARIABLES & MORE MACROS                              *
 *                                                       *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * */


extern ort_vars_t *ort;
/* My eecb */
#ifdef USE_TLS
	/* FIXME: This only works for pthreads */
	extern TLS_KEYWORD void *myeecb;
	#define __SETMYCB(v) (myeecb = v)
	#define __MYCB       ((ort_eecb_t *) myeecb)
#else
	extern ee_key_t eecb_key;
	#define __SETMYCB(v) ee_setspecific(eecb_key,v)
	#define __MYCB       ((ort_eecb_t *) ee_getspecific(eecb_key))
#endif

#if !defined(AVOID_OMPI_DEFAULT_TASKS)
	#define __CURRTASK(eecb)             ((eecb)->tasking.current_executing_task)
	#define __SETCURRTASK(eecb,task)     ((eecb)->tasking.current_executing_task = task)
	#define __CURRIMPLTASK(eecb)         ((eecb)->tasking.current_implicit_task)
	#define __SETCURRIMPLTASK(eecb,task) ((eecb)->tasking.current_implicit_task = task)
	#define __INHERITASK(eecb)           ((eecb)->tasking.current_executing_task->inherit_task_node)
	#define __FINALTASK(eecb)            ((eecb)->tasking.current_executing_task->isfinal)
#else
	#define __CURRTASK(eecb)             (ee_get_currtask(&(eecb)->ee_info, (eecb)->thread_num))
	#define __SETCURRTASK(eecb,task)     ee_set_currtask(&(eecb)->ee_info, task)
	#define __SETCURRIMPLTASK(eecb,task) ee_set_currtask(&(eecb)->ee_info, task)
	#define __INHERITASK(eecb)           (ee_get_currtask(&(eecb)->ee_info, (eecb)->thread_num)->inherit_task_node)
	#define __FINALTASK(eecb)            (ee_get_currtask(&(eecb)->ee_info, (eecb)->thread_num)->isfinal)
#endif


/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 *                                                         *
 *  FUNCTIONS etc (also used by the parser, see ort.defs)  *
 *                                                         *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */


#if defined(EE_TYPE_PROCESS)
	void ort_shmalloc(void **p, int size, int *upd);
	void ort_shmfree(void *p);
	void ee_shmfree(int *p);

	int  thrfunc_get_id(ttkfunc_t func);
	ttkfunc_t thrfunc_get_ptr(int fid);

	int taskfunc_get_id(ttkfunc_t func);
	ttkfunc_t taskfunc_get_ptr(int fid);

	void thrtask_funcs_cleanup();
	void _ort_register_thrfunc(ttkfunc_t func);
	void _ort_register_taskfunc(ttkfunc_t func);
#endif

int   _ort_init(int *argc, char ***argv, int embedmode, int nmodules, ...);
void  _ort_finalize(int exitval);
ort_eecb_t *eecb_initial_prepare(ort_eecb_t *cb);
void  free_initial_eecb(ort_eecb_t *cb);
void *ort_calloc(int size);
void  ort_prepare_omp_lock(omp_lock_t *lock, int type);
void  execute_kernel_on_host(void *(*func)(void *), void *shared,int nt,int tl);
void *eecb_alloc_temp(), eecb_free_temp(ort_eecb_t *eecb);
void *mcbf_alloc(void);

/*
 * From affinity.c
 */
void ort_set_affinity_format(const char *format);
char *ort_get_default_affinity_format(void);
size_t ort_get_affinity_format(char *buffer, size_t size);
void ort_display_affinity(const char *format);
size_t ort_capture_affinity(char *buffer, size_t size, const char *format);

/*
 * From barrier.c
 */
void default_barrier_wait_in_parallel(ort_defbar_t *bar, int eeid);

/*
 * From cancel.c
 */
#define check_cancel_parallel(me) (TEAMINFO(me)->cancel_par_active)
#define check_cancel_for(me) \
	(((me)->parent) ? TEAMINFO(me)->cancel_for_active : (me)->cancel_for_me)
#define check_cancel_sections(me) \
	(((me)->parent) ? TEAMINFO(me)->cancel_sec_active : (me)->cancel_sec_me)
#define check_cancel_taskgroup(me) \
	(__CURRTASK(me)->taskgroup != NULL && __CURRTASK(me)->taskgroup->is_canceled)

/*
 * From env.c
 */
void env_get_environment(void);
void env_display_vars(int);
void env_read_auto_schedule(char*, xsched_data_t *);
void env_read_tag_threads(char*, int*, int*);

/* 
 * From hostdev.c
 */
ort_module_t *hostdev_get_module();

/*
 * From modules.c
 */
#define HOSTDEV_IDX 0   /* The index of the host device */
#define IS_OMPDEVID(id) ((id) >= -1 && (id) < ort->num_devices)
#define IS_HOST_DEVID(id) ((id) == -1 || (id) == ort->num_devices-1)

int           ort_devidx(int ompdevid);
int           ort_ompdevid(int ortdevidx);
int           ort_illegal_device(char *reason, int devid);
ort_device_t *ort_get_device(int dev_id);
void          ort_discover_modules(int nmodules, char **modnames);
void          ort_init_device(int device_id);
void          ort_finalize_devices();
void          reduce_device_status(int gID);

/*
 * From places.c
 */
void  places_get_default(void);
void  places_getenv(char *s);
void  places_show();
char *places_get_list_str(int pfrom, int pto);
void  spread_subpartition(int tid, int nthreads, int startplace, 
	                        int pfrom, int pto, int *newfrom, int *newto);
void  bindme(int eeid, ort_eecb_t *t, ort_eecb_t *parent);

/*
 * From reduction.c
 */
void reds_init(ort_eecb_t *me, int teamsize);
void reds_finalize(ort_eecb_t *me);

/*
 * From target.c
 */
void  target_decltarg_initialize();
void *target_checkedmapped_var(void *var, ort_device_t *d, int *how);
int   target_associate_ptr(void *hostaddr, void *umedaddr,
                           u_long size, u_long devoff, int devid);
int   target_disassociate_ptr(void *hostaddr, int devid);

/*
 * From teams.c
 */
void league_initial();
void teams_initial_thread_work(int teamid);

/*
 * From taskdeps.c
 */
void tdeps_issue_task(ort_task_node_t *tnode, 
                      void **deparray, int out, int in, int inout);
void tdeps_after_execution(ort_task_node_t *tnode, void *me);
void tdeps_free_tdepinfo(void *dependencies);

/*
 * From tasks.c & tasks_spw.c
 */
void  tasking_init();
void  taskqueues_init(ort_eecb_t *me, int nthr);
void  spwtasks_execute_node(ort_eecb_t *me, ort_task_node_t *tnode);
void  spwtasks_create(ort_eecb_t *me, int ntasks, int offs, void *(*f)(void*));

void  ort_task_execute_this(ort_eecb_t *me, ort_task_node_t *task_to_execute);
void  ort_create_task_immediate_node(ort_eecb_t *thr);
void  ort_start_implicit_task(ort_eecb_t *thr);
void  ort_finish_implicit_task(ort_eecb_t *thr);
#if !defined(AVOID_OMPI_DEFAULT_TASKS)
void  ort_enqueue_task(ort_eecb_t *me, ort_task_node_t *tnde);
#endif
void  finish_all_team_tasks(ort_eecb_t *me);

/* Called by compiler-generated code */
void  _ort_new_task(void *(*func)(void *arg), void *arg,
                    int now, int final, int untied, int priority,
                    void **deparray, int noutdeps, int nindeps, int ninoutdeps);
int   _ort_task_throttling(void);
void  _ort_taskwait(int waitall);
void *_ort_task_immediate_start(int final);
void  _ort_task_immediate_end(void *tn);

/*
 * From tasks_mem.c
 */
ort_task_node_t *ort_task_alloc(void *(*func)(void *), void *arg);
ort_task_node_t *ort_task_alloc_init(void *(*func)(void *), void *arg,
                                    int final, int rtid, ort_eecb_t *thr);
void             ort_task_free(ort_eecb_t *thr, ort_task_node_t *node);
taskgroup_t     *taskgroup_alloc(void);
void             taskgroup_free(taskgroup_t *arg);
void             task_pools_init(ort_eecb_t *t);

/* Called by compiler-generated code */
void            *_ort_taskenv_alloc(int size, void *(*task_func)(void *));

/*
 * From worksharing.c
 */
/* Workshare-related functions */
void init_workshare_regions(ort_eecb_t *me);
wsregion_t *my_wsregion(ort_eecb_t *me);

#endif     /* __ORT_PRIVE_H__ */
