/*
  OMPi OpenMP Compiler
  == Copyright since 2001 the OMPi Team
  == Dept. of Computer Science & Engineering, University of Ioannina

  This file is part of OMPi.

  OMPi is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  OMPi is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with OMPi; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/


// #define DBGPRN_FORCE
// #define DBGPRN_BLOCK
#define DBGPRN_FILTER DBG_ROFF_NODEMGR

#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <ctype.h>
#include <string.h>
#include <unistd.h>
#include "ort_prive.h"
#include "assorted.h"

#ifdef OMPI_REMOTE_OFFLOADING

#include "remote/roff.h"
#include "remote/node_manager.h"
#include "remote/memory.h"
#include "remote/workers.h"
#include "remote/workercmds.h"
#include "remote/roff_prive.h"
#include "roff_config.h"
#include "rt_common.h"


#define DBGTERMINAL "/usr/bin/xterm" /* xterm used for debugging */

enum noderoletype node_role = ROLE_PRIMARY; /* default role */
int num_primary_devs = 0;

static void ***devinfos; /* Used by the primary node to store all devinfos */
void *comminfo;

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 *                                                                   *
 * NODE HANDLING                                                     *
 *                                                                   *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */


/* Returns the number of configured remote nodes */
int roff_man_get_num_nodes(void)
{
	return roff_config.num_nodes;
}


char *roff_man_get_nodename(int node_id)
{
	return roff_config.nodes[node_id].name;
}


void roff_man_finalize(void)
{
	int i, num_nodes = roff_man_get_num_nodes();
	
	roff_config_finalize();
	
	for (i = 0; i < num_nodes; ++i)
		free(devinfos[i]);
	free(devinfos);
	devinfos = NULL;
	
	DBGPRN((stderr, "[remote] closing; roff_man_finalize was called\n"));
}


/* Returns the ID of the calling node */
int roff_man_get_my_id(void)
{
	return Comm_Get_id(comminfo);
}


/* This is called by the host and the other nodes (when spawned)
 * from _ort_init().
 */
void roff_man_prepare_node(int *argc, char ***argv)
{   
	char *rolestr;
	comminfo = Comm_Init(argc, argv);

	rolestr = (node_role == ROLE_WORKER) ? "worker" : "primary"; /* set after Comm_Init */

	DBGPRN((stderr, "[remote %s %d] >>> roff_man_prepare_node: initializing comm layer\n", 
				    rolestr, getpid()));

	if (comminfo == NULL)
		ort_error(1, "[remote %s %d] >>> roff_man_prepare_node: communication layer could not be initialized; exiting.\n",
		             rolestr, getpid());

	if (node_role == ROLE_WORKER)
		roff_worker_init();
}


int roff_man_get_device_chunk(int num_devices, int num_siblings, int ee_id, 
                              int *first_device_id, int *last_device_id)
{
	int    N = num_siblings, myid = ee_id;
	u_long chunksize;

	if (N == 1)
	{
		*first_device_id = 0;
		*last_device_id = num_devices;
		return (*first_device_id != *last_device_id);
	}
	if (num_devices <= N)    /* less iterations than threads */
	{
		*first_device_id = myid;
		*last_device_id = (myid < num_devices) ? myid + 1 : myid;
		return (*first_device_id != *last_device_id);
	}

	chunksize = num_devices / N;
	num_devices = num_devices % N;
	if (num_devices) chunksize++;     /* first num_devices threads get this chunksize */

	if (myid < num_devices || num_devices == 0)       /* I get a full chunk */
	{
		*first_device_id = myid * chunksize;
		*last_device_id = *first_device_id + chunksize;
	}
	else                                  /* I get a smaller chunk */
	{
		*first_device_id = num_devices * chunksize + (myid - num_devices) * (chunksize - 1);
		*last_device_id = *first_device_id + (chunksize - 1);
	}
	return (*first_device_id != *last_device_id);
}

#ifdef ROFF_MULTIPLE_WORKERS

/* The primary node initially retrieves the number of devices installed 
 * at each worker node, from the configuration. Then, from each node, 
 * it receives an array containing the devinfos of all its local devices.
 */
static void _primary_receive_all_devinfos(void)
{
	int i, w, j, tabidx = 0, numdevs, numworkers, num_nodes = roff_man_get_num_nodes();
	int device_id_in_node, device_id;
	roff_devinfo_t *item;

	DBGPRN((stderr, "[remote primary %d] >>> Receiving devinfos...\n", getpid()));
	num_primary_devs = ort->num_local_devices;

	/* Receive devinfos from all nodes and add them to the map */
	for (i = 0; i < num_nodes; i++)
	{
		numdevs = roff_config.nodes[i].total_num_devices;
		numworkers = (ROFF_NUM_NODE_WORKER_PROCS == -1) ? numdevs : ROFF_NUM_NODE_WORKER_PROCS;

		/* Let the node know how many devices I have */
		_Comm_Send_1int(&num_primary_devs, i+1, 0);

		devinfos[i] = (void **) smalloc(numdevs * sizeof(void *));

		int offset = 0;

		for (w = 0; w < numworkers; w++)
		{
			int fid, lid;
			roff_man_get_device_chunk(numdevs, numworkers, w, &fid, &lid);

			/* Add the received devinfos to the devinfo table.
			 * For each device, we also store the intranode device ID,
			 * the intramodule device ID and the node ID.
			 */
			Comm_Recv(comminfo, (i * numworkers) + w + 1, COMM_BYTE, 
			          &(devinfos[i][offset]), (lid-fid) * sizeof(void*), 
			          COMM_ANY_TAG, NULL);

			DBGPRN((stderr, "[remote primary %s] >>> Received %d infos from worker %d, stored at %d (max=%d) (fid=%d, lid=%d)\n",
					Comm_Get_info(comminfo), lid-fid, (i*numworkers) + w + 1, offset, numdevs, fid, lid));

			offset += (lid - fid);
		}
		
		
		/* Device order in .ompi_remote_devices must be equal to the
		 * actual device order in the worker node. The CPU device must be 
		 * detected and always be placed at 0.
		 */
		for (device_id_in_node = 0, j = 0; j < roff_config.nodes[i].num_modules; j++)
		{
			for (device_id = 0; device_id < roff_config.nodes[i].modules[j].num_devices; 
			     device_id_in_node++, device_id++, tabidx++)
			{
				item = &(devinfotab[tabidx]);
				item->device_id_in_node = device_id_in_node;
				item->device_id_in_module = device_id;
				item->global_device_id = tabidx + ort->num_local_devices;
				item->node_id = i + 1;
				item->devinfo = devinfos[i][device_id_in_node];
				item->modulename = roff_config.nodes[i].modules[j].name;
				item->alloc_table = roff_alloctab_new(item->global_device_id);
			}
		}
	}

	DBGPRN((stderr, "[remote primary %d] >>> Received all devinfos\n", getpid()));
}

#else

/* The primary node initially retrieves the number of devices installed 
 * at each worker node, from the configuration. Then, from each node, 
 * it receives an array containing the devinfos of all its local devices.
 */
static void _primary_receive_all_devinfos(void)
{
	int i, j, tabidx = 0, numdevs, num_nodes = roff_man_get_num_nodes();
	int device_id_in_node, device_id;
	roff_devinfo_t *item;

	DBGPRN((stderr, "[remote primary %d] >>> Receiving devinfos...\n", getpid()));
	num_primary_devs = ort->num_local_devices;

	/* Receive devinfos from all nodes and add them to the map */
	for (i = 0; i < num_nodes; i++)
	{
		numdevs = roff_config.nodes[i].total_num_devices;
		devinfos[i] = (void**) smalloc(numdevs * sizeof(void*));

		/* Let the node know how many devices I have */
		_Comm_Send_1int(&num_primary_devs, i+1, 0);

		/* Add the received devinfos to the devinfo table.
		 * For each device, we also store the intranode device ID,
		 * the intramodule device ID and the node ID.
		 */
		Comm_Recv(comminfo, i+1, COMM_BYTE, devinfos[i], numdevs*sizeof(void*), 
		          COMM_ANY_TAG, NULL);
		
		/* Device order in .ompi_remote_devices must be equal to the
		 * actual device order in the worker node. The CPU device must be 
		 * detected and always be placed at 0.
		 */
		for (device_id_in_node = 0, j = 0; j < roff_config.nodes[i].num_modules; j++)
		{
			for (device_id = 0; device_id < roff_config.nodes[i].modules[j].num_devices; 
			     device_id_in_node++, device_id++, tabidx++)
			{
				item = &(devinfotab[tabidx]);
				item->device_id_in_node = device_id_in_node;
				item->device_id_in_module = device_id;
				item->global_device_id = tabidx + ort->num_local_devices;
				item->node_id = i + 1;
				item->devinfo = devinfos[i][device_id_in_node];
				item->modulename = roff_config.nodes[i].modules[j].name;
				item->alloc_table = roff_alloctab_new(item->global_device_id);
			}
		}
	}

	DBGPRN((stderr, "[remote primary %d] >>> Received all devinfos\n", getpid()));
}

#endif

/* This one is called only from the host, during hm_dev_init().
 * It spawns the workers and returns the merged communicator.
 */
int roff_man_create_workers(void)
{
	int num_nodes;

	DBGPRN((stderr, "[remote primary %d] >>> Creating nodes\n", getpid()));

	if ((num_nodes = roff_man_get_num_nodes()) == 0)
		return 0;

	devinfos = (void ***) smalloc(num_nodes * sizeof(void**));
	Comm_Spawn(comminfo, num_nodes, ROFF_NUM_NODE_WORKER_PROCS);

	/* The primary node receives all devinfos from all workers */
	_primary_receive_all_devinfos();

	return 1;
}

#undef DEBUG_NODE_MANAGER
#endif /* OMPI_REMOTE_OFFLOADING */
