/*
  OMPi OpenMP Compiler
  == Copyright since 2001 the OMPi Team
  == Dept. of Computer Science & Engineering, University of Ioannina

  This file is part of OMPi.

  OMPi is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  OMPi is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with OMPi; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/

#ifndef __VK_DEVINFO_H__
#define __VK_DEVINFO_H__

#include <vulkan/vulkan.h>
#include <pthread.h>
#include "rt_common.h"

#define VK_CACHE_SIZE 256
#define VK_DEFAULT_WORKGROUP_INVOCATIONS 128
#define MODSUB(x,c) (((x)+VK_CACHE_SIZE-(c)) % VK_CACHE_SIZE)

#define VK_ENABLE_CONCURRENCY

typedef struct 
{
	uint32_t X;
	uint32_t Y;
	uint32_t Z;
} vk_dimensions_t;

typedef struct 
{
	char                 *name;        /* Shader name */
	char                 *filename;    /* Shader filename */
	char                 *sources_filename; /* Shader sources filename */
	void                **args;        /* Shader arguments */
	int                   nargs;       /* Number of shader arguments */
	void                 *(*host_func)(void *); /* Host function */
	VkShaderModule        compute_shader_module;
	VkPipelineLayout      pipeline_layout;
	VkPipeline            compute_pipeline;
	VkDescriptorSetLayout descriptor_set_layout;
	VkDescriptorPool      descriptor_pool;
	VkDescriptorSet       descriptor_set;
	vk_dimensions_t       workgroups;
	vk_dimensions_t       invocations;
	char                 *code;
	size_t                code_size;

	VkDescriptorBufferInfo *buffer_infos; /* track allocated memory */
	VkWriteDescriptorSet   *desc_sets;    /* track allocated memory */

	pthread_mutex_t lock;
	bool lock_init;
} vk_shader_t;

typedef struct 
{
	VkPhysicalDevice           physical_device;
	VkPhysicalDeviceProperties properties;
	VkPhysicalDeviceFeatures   features;
	VkPhysicalDeviceMemoryProperties memory_properties;
} vk_gpu_physical_dev_t;

typedef struct 
{
	VkDevice         device;
	VkQueue          compute_queue;
	VkCommandPool    command_pool;
	VkCommandBuffer  command_buffer;
} vk_gpu_ctx_t;


typedef struct 
{
	int              id;
	int              global_id;
	int              num_launched_shaders;
	devicestatus_e   status;
	
	/* Per-thread context */
#ifdef VK_ENABLE_CONCURRENCY
	pthread_key_t    context_key;
#else
	vk_gpu_ctx_t    *context;
#endif
	vk_gpu_physical_dev_t pdev;

	VkSpecializationMapEntry dimensions[3];
	ort_icvs_t       dev_icvs;  /* ICV initial values */
	int              nshaders;  /* Number of cached shaders */
	vk_shader_t      shader_cache[VK_CACHE_SIZE];
	int sharedspace;    /* Shared address space with the host (0/1) */
} vk_gpu_t;

typedef struct 
{
	VkBuffer buffer;
	VkDeviceMemory mem;
} vk_devptr_t;

extern int available_gpus;
extern VkInstance instance;
extern vk_gpu_t *vk_gpus;
extern void *vklock;
extern bool _hm_init_called;

/* Pointers to lock functions of the host runtime */
extern void (*init_lock)(void **lock, int type);
extern void (*lock)(void **lock);
extern void (*unlock)(void **lock);

extern void vkgpus_init_all(bool prepare_gpus);
extern void vkgpus_finalize(void);

extern void *vkgpu_alloc(vk_gpu_t *gpu, VkDeviceSize size, int map_type);
extern void vkgpu_free(vk_gpu_t *gpu, void *addr);
extern int  vkgpu_get_num_gpus(vk_gpu_physical_dev_t **devices, VkPhysicalDevice **pdevices, 
                               uint32_t *total_dev_count);
extern void vkgpu_finalize_device(vk_gpu_t *gpu);

extern int  vkgpu_launch_shader(vk_gpu_t *gpu, int shader_id, void *devdata, size_t devdata_size, 
                               int *num_args, void **args, uint32_t num_teams, int num_threads, 
                               unsigned long long teamdims, unsigned long long thrdims, 
                               int thread_limit);
extern void vkgpu_cleanup_shader(vk_gpu_t *gpu, int shader_id);
extern int  vkgpu_new_shader(vk_gpu_t *gpu, char *shader_name, char *shader_filename, 
                             char *sources_filename, void *host_func);

extern void vkgpu_host2dev(vk_gpu_t *gpu, VkDeviceMemory devmem, VkDeviceSize devoffset, 
                           void *hostaddr, size_t hostoffset, size_t size);
extern void vkgpu_dev2host(vk_gpu_t *gpu, VkDeviceMemory devmem, VkDeviceSize devoffset, 
                           void *hostaddr, size_t hostoffset, size_t size);

#endif
