/*
  OMPi OpenMP Compiler
  == Copyright since 2001 the OMPi Team
  == Dept. of Computer Science & Engineering, University of Ioannina

  This file is part of OMPi.

  OMPi is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  OMPi is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with OMPi; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/

/* devpart.c -- OpenCL device runtime without OpenMP functionality */

/* Although we try to use OpenCL 1.2, we employ global scope variables, 
 * so we require that the device actually allows them.
 * Technically, OpenCL C 1.2 does not support such variables; however most 
 * compilers do provide the necessary support. 
 * OpenCL 2.0 supports them officially.
 * OpenCL 3.0 leaves it optional; one can check support with the following:
 * #ifdef __opencl_c_program_scope_global_variables
 */

/* This has to be done before any #includes since headers may used "double" */
#ifdef OCLC_HAS_DOUBLE
	#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#else
	#define double float
#endif

// #define DEBUG 0
//#define CL_DEVICE_MAX_WORK_GROUP_SIZE 28

#define __NWORKGROUPS get_num_groups(0)
#define __WORKGROUPID get_group_id(0)
#define __NWORKITEMS  get_local_size(0)
#define __WORKITEMID  get_local_id(0)
#define __GLOBALTHRID get_global_id(0)

/* Keep global integers that hold the OpenMP device ID and the thread_limit; we
 * obtain the values from a buffer that is passed to us by the kenrel wrapper.
 * We would prefer the buffer to be in constant memory but it would not work 
 * with more recent NVIDIA GPUs.
 */
__global int _assigned_device_id;
__global int _assigned_thread_limit;
void _ort_set_xtrainfo(__global int *xtrainfo)
{
	_assigned_device_id = xtrainfo[0];
	_assigned_thread_limit = xtrainfo[1];
}


/* acceptable memset */
void memset(char *s, int c, int n)
{
	for (n--; n >= 0; --n)
		s[n] = (char) c;
}

void _ort_entering_for(int nowait, int hasordered)
{
}

int _ort_leaving_for(void) 
{
	return __NWORKITEMS;
}


void _ort_fence()
{
	mem_fence(CLK_GLOBAL_MEM_FENCE);
}


void _ort_taskwait(int waitall)
{
	barrier(CLK_GLOBAL_MEM_FENCE);
}


int _ort_barrier_me(int ignoreReason)
{
	barrier(CLK_GLOBAL_MEM_FENCE);
	return 0;
}

int _static_chunk(int pid, int np, unsigned long niters, 
                  unsigned long *fiter, unsigned long *liter) 
{
	*fiter = 0;
	*liter = niters;
	return (*fiter != *liter);
}

int _ort_get_distribute_chunk(unsigned long niters, 
                              unsigned long *fiter, unsigned long *liter) 
{
	return _static_chunk(__WORKGROUPID, __NWORKGROUPS, niters, fiter, liter);
}

int _ort_get_static_default_chunk(unsigned long niters, 
                                  unsigned long *fiter, unsigned long *liter) 
{
	return _static_chunk(__WORKITEMID, __NWORKITEMS, niters, fiter, liter);
}

int _ort_get_dynamic_chunk(unsigned long niters,unsigned long chunksize,
           int monotonic,unsigned long *fiter,unsigned long *liter,int *ignored)
{
	if (*liter!=0)
		return 0;
	_ort_get_static_default_chunk(niters,fiter,liter);
	return 1;
}

int _ort_get_guided_chunk(unsigned long niters, unsigned long chunksize,
	                    int monotonic, unsigned long *fiter, unsigned long *liter,
	                    int *ignored)
{
	if(*liter!=0)
		return 0;
	_ort_get_static_default_chunk(niters,fiter,liter);
	return 1;
}

void _ort_entering_sections(int nowait, int numberofsections) { }
void _ort_leaving_sections() { }

int _ort_get_section_alt(int lastsec, int totalsecs)
{
	return lastsec;
}

int _ort_mysingle(int nowait)
{
	/* Always give it to thread 0 */
	return (__WORKITEMID == 0);
}

void _ort_leaving_single(void)
{
	return;
}

__global char *_dev_med2dev_addr(__global void *in, unsigned long size)
{
	return in;
}


/************************************
 *                                  *
 *        OpenMP API                *
 *                                  *
 ************************************/

 
void omp_set_num_threads(int num_threads) {}
int  omp_get_num_threads(void) { return __NWORKITEMS; }
int  omp_get_max_threads(void) { return __NWORKITEMS; }
int  omp_get_thread_num(void) {	return __WORKITEMID; }
int  omp_get_num_procs(void) { return __NWORKITEMS; }
void omp_set_dynamic(int dyn) { }
int  omp_get_dynamic(void) { return 0; }
int  omp_get_cancellation(void) { return 0; }
void omp_set_nested(int nest) { }
int  omp_get_nested(void) { return 0; }
int  omp_get_thread_limit(void) { return _assigned_thread_limit; }
void omp_set_max_active_levels(int levels) { }
int  omp_get_max_active_levels(void) { return 1; }
int  omp_get_supported_active_levels(void) { return 1; }
int  omp_get_level(void) { return 0; }
int  omp_get_team_size(int level) 
{ 
	return (level < 0 || level > 1) ? -1 : ((level == 0) ? 1 : __NWORKITEMS); 
}
int  omp_get_num_teams(void) { return __NWORKGROUPS; }
int  omp_get_team_num(void) { return __WORKGROUPID; }
int  omp_in_parallel(void) { return (__NWORKITEMS > 1); }
int  omp_is_initial_device(void) { return 0; }
int  omp_get_device_num(void) { return _assigned_device_id; }
