/*
  OMPi OpenMP Compiler
  == Copyright since 2001 the OMPi Team
  == Dept. of Computer Science & Engineering, University of Ioannina

  This file is part of OMPi.

  OMPi is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  OMPi is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with OMPi; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/

// #define DBGPRN_FORCE 
// #define DBGPRN_BLOCK
#define DBGPRN_FILTER DBG_ROFF_MEMORY

#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <unistd.h>
#include <assert.h>
#include "targdenv.h"
#include "ort_prive.h"
#include "ort.h"

#ifdef OMPI_REMOTE_OFFLOADING
#include "roff_config.h"
#include "remote/memory.h"
#include "remote/roff_prive.h"
#include "remote/node_manager.h"
#include "remote/workercmds.h"
#include "assorted.h"

#define MEM_ITEMS_CAPACITY 64

/* roff_alloc_tables is a dynamically allocated array that holds all allocated
 * memory. Only worker processes need to truly allocate memory. The primary node just
 * registers (marking index as used). This is happening so the primary node
 * and worker nodes can know which indexes are used without needing to communicate
 * with each onther. The index of an element is its mediary address, while
 * its value is a struct that holds its actual address. At the
 * **BEGINNING** of the array reside variables that exist in #omp declare
 * target link clause(if any). For these global variables we don't allocate
 * new space; we just store their real address.
 */
roff_alloctab_t **roff_alloc_tables; /* used only by workers */
static int num_devices;            /* Number of devices (and elements for each array) */

/* Make sure there is enough space to store the requested index at the
 * requested device. Allocate more space if necessary.
 */
static
void check_space(roff_alloctab_t *table, int index)
{
	int new_capacity;

	if (index >= table->capacity)
	{
		/* new_capacity = MAX(next_addr, current_capacity+MEM_ITEMS_CAPACITY) */
		new_capacity = (index >= table->capacity + MEM_ITEMS_CAPACITY) ?
			index + 1 : table->capacity + MEM_ITEMS_CAPACITY;
			
		table->mem = realloc(table->mem, new_capacity * sizeof(roff_allocitem_t));
		if (!table->mem)
		{
			perror("check_space()");
			exit(EXIT_FAILURE);
		}
		/* zero-out new elements */
		memset(table->mem + table->capacity, 0,
				(new_capacity - table->capacity) * sizeof(roff_allocitem_t));
		table->capacity = new_capacity;
	}
}


/* Make sure that next_available_index points to a valid and unused
 * position.
 */
static
void update_available_index(roff_alloctab_t *table, int start_address)
{
	table->next_available_index = start_address;
	
	while ((table->next_available_index < table->capacity)
	       && (table->mem[table->next_available_index].data))
	{
		++(table->next_available_index);
	}
	check_space(table, table->next_available_index);
}


void roff_alloctab_add_global(roff_alloctab_t *table, roff_datatype_t maddr, void *medaddr)
{
	table->mem[maddr].data = medaddr;
	
	DBGPRN((stderr, "ADDED GLOBAL maddr %d hostvar %p to dev %d\n", maddr, medaddr, table->device_id));
}


/* Helper function. 
 * NOTE: Size is ignored when calling with do_alloc = 0.
 */
static
void *_tab_add(roff_alloctab_t *table, roff_datatype_t maddr, size_t size, int map_memory, 
               int do_alloc)
{
	roff_allocitem_t *item;
	ort_device_t *d = &(ort->ort_devices[table->device_id]);
	check_space(table, maddr);
	
	item = &(table->mem[maddr]);
	// TODO: Change MAP_TYPE_IGNORE
	item->data = (do_alloc) ? DEVICE_ALLOC(d, size, map_memory, NO_HOSTADDR, MAP_TYPE_IGNORE) : (void *)0x999;

	DBGPRN((stderr, "%s maddr %d%s%s of size %d to %p to dev %d\n",
	    (do_alloc) ? "ADDED" : "REGISTERED", maddr, 
		map_memory == MAPPED_DEVDATA ? " (devdata)" : "",
	    map_memory == MAPPED_DECLDATA ? " (decldata)" : "",
		size, item->data, table->device_id
	));

	return item->data;
}


/* Helper function */
static
void _tab_remove(roff_alloctab_t *table, roff_datatype_t maddr, int unmap_memory, int do_free)
{
	roff_allocitem_t *item;
	ort_device_t *dev = &(ort->ort_devices[table->device_id]);
	
	if (maddr < table->global_vars_index) return; /* cannot free a global variable */
	
	item = &(table->mem[maddr]);

	if (do_free)
		DEVICE_FREE(dev, item->data, unmap_memory);
	item->data = NULL;

	table->next_available_index = (table->next_available_index < maddr) ?
		table->next_available_index : maddr;

	DBGPRN((stderr, "%s maddr %d%s%s, next_available_index is %d on dev %d\n",
	    (do_free) ? "DELETED" : "UNREGISTERED", maddr,
		unmap_memory == MAPPED_DEVDATA ? " (devdata)" : "",
	    unmap_memory == MAPPED_DECLDATA ? " (decldata)" : "",
	    table->next_available_index, dev->idx
	));
}


/* 
 * Creates an allocation table for a specific device.
 * NOTE: Called only by the primary node
 */
roff_alloctab_t *roff_alloctab_new(int device_id)
{
	roff_alloctab_t *table;
	table = smalloc(sizeof(roff_alloctab_t));

	table->mem = scalloc(1, sizeof(roff_allocitem_t));
	table->capacity = 0;
	table->global_vars_index = 0;
	table->next_available_index = -1;
	table->device_id = device_id;

	return table;
}


/* Initialize this module. 
 * NOTE: This function should be called only once by the workers, before 
 * any other function in this file is called. 
 */
void roff_alloctab_init(int number_of_devices)
{
	int i;
	num_devices = number_of_devices;
	roff_alloc_tables = scalloc(num_devices, sizeof(roff_alloctab_t*));

	for (i = 0; i < number_of_devices; i++)
		roff_alloc_tables[i] = roff_alloctab_new(i);
}


/* Add global variables that exist in #omp declare target link clause.
 * The primary node must call this function with is_primary == 1, for all devices
 * and worker nodes with is_primary == 0. NOTE that this function should 
 * be called only once and before any other add function is called. */
void roff_alloctab_init_global_vars(int device_id)
{
	tdenv_t env;
	roff_alloctab_t *table;
	roff_devinfo_t *devinfo;

	if (node_role == ROLE_PRIMARY)
	{
		devinfo = devinfotab_get_from_gldevid(device_id);
		table = devinfo->alloc_table;
	}
	else
		table = roff_alloc_tables[device_id];

	/* The primary node has an mem array per device; it must add the
	 * global vars at the correct position. Worker processes have a single
	 * array (since they care only about themselves); they must add the
	 * global vars at position 0.
	 */
	env = tdenv_global_env(device_id);
	
	check_space(table, env->used_entries);
	table->global_vars_index = env->used_entries + 1; /* # global vars + 1 */
	table->next_available_index = table->global_vars_index;
}


/* Register (but do not allocate memory for) a variable and return its
 * mediary address. 
 * NOTE: Should be used only by the primary node. 
 */
roff_datatype_t roff_alloctab_register(roff_alloctab_t *table, int map_memory)
{
	roff_datatype_t maddr;
	if ((maddr = table->next_available_index) == -1) /* first time initialization */
	{
		/* Set next available index to 0, just to allocate
		 * memory for node's mem.
		 */
		update_available_index(table, FIRST_ADDRESS);
		maddr = table->next_available_index = 1; /* leave 0 empty for NULL address */
	}
	
	_tab_add(table, maddr, SIZE_IGNORE, map_memory, DONT_ALLOC);
	update_available_index(table, maddr + 1);

	return maddr;
}


/* Unregister (but do not deallocate memory for) a variable. 
 * NOTE: Should be used only by the primary node. 
 */
void roff_alloctab_unregister(roff_alloctab_t *table, roff_datatype_t maddr, int unmap_memory)
{
	_tab_remove(table, maddr, unmap_memory, DONT_FREE);
}


/* Allocate memory for the given size and mediary address. Return a pointer
 * to the allocated memory. 
 * NOTE: Should be used by workers. 
 */
void *roff_alloctab_add(roff_alloctab_t *table, roff_datatype_t maddr, size_t size, int map_memory)
{
	return _tab_add(table, maddr, size, map_memory, DO_ALLOC);
}


/* Deallocate memory with the given mediary address. 
 * NOTE: Should be used by workers. 
 */
void roff_alloctab_remove(roff_alloctab_t *table, roff_datatype_t maddr, int unmap_memory)
{
	_tab_remove(table, maddr, unmap_memory, DO_FREE);
}


/* Return the real address of the given mediary address. */
void *roff_alloctab_get(roff_alloctab_t *table, roff_datatype_t maddr)
{
	if ((int)maddr >= table->capacity)
	{
		DBGPRN((stderr, "roff_alloctab_get maddr %d is bigger than %d on dev %d\n",
				maddr, table->capacity, table->device_id));
		abort();
	}
	return table->mem[maddr].data;
}


/* Return the real address of the given mediary address. */
int roff_alloctab_get_item(roff_alloctab_t *table, void *addr)
{
	int i;

	for (i = 0; i < table->capacity; i++)
		if (table->mem[i].data == addr)
			return i;
	return -1;
}


/* Free all allocated memory. Should be called once when finalizing.
 * NOTE: No other functions of this file should be called after that. */
void roff_alloctab_free_all(void)
{
	int i, device_id, total = 0;
	int start = 0, end;
	roff_alloctab_t *table;
	ort_device_t *d = NULL;

	if (node_role == ROLE_PRIMARY)
		end = devinfotab_size; /* 0 .. #remotedevs */
	else
		end = ort->num_devices; /* 0 .. #localdevs */

	for (device_id = start; device_id < end; device_id++)
	{
		if (node_role == ROLE_PRIMARY)
			table = devinfotab[device_id].alloc_table;
		else
		{
			table = roff_alloc_tables[device_id];
			d = &(ort->ort_devices[table->device_id]);
		}

		/* Cannot free a global variable (because it was never malloced) */
		for (i = table->global_vars_index; i < table->capacity; ++i)
		{
			if (table->mem[i].data)
			{
				if ((node_role == ROLE_WORKER) && d) /* only worker does alloc */
				{
					DEVICE_FREE(d, table->mem[i].data, 0); 
					++total;
				} 
			}
		}

		free(table->mem);
		free(table);
	}

	if (node_role == ROLE_WORKER)
		free(roff_alloc_tables);

	DBGPRN((stderr, "[remote %d] CALLED roff_alloctab_free_all, freed %d items\n", 
	                getpid(), total));
}

#undef DEBUG_MEMORY
#endif /* OMPI_REMOTE_OFFLOADING */
