/*
  OMPi OpenMP Compiler
  == Copyright since 2001 the OMPi Team
  == Dept. of Computer Science & Engineering, University of Ioannina

  This file is part of OMPi.

  OMPi is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  OMPi is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with OMPi; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "hostpart/oclutils.h"

// #define DEBUG_CL

#ifndef OMPI_ROOTDIR
	#define OMPI_ROOTDIR "../"
#endif

char *srcfile, *binfile_prefix, *output_dir;
char **source_filenames;
int num_source_files = 0;
char buildopts[512];

int buildfor(cl_device_id cldev, cl_platform_id clplat, int gpuid)
{
	cl_context            context;                   // compute context
	cl_program            *programs, linked_program; // compute program
	cl_context_properties cprops[3];
	cl_int                err;
	cl_uint               maxcver;
	cl_device_fp_config   supportsdouble;
	int                   outfd, i;
	size_t                binsize = 0;
	char                  *binary, *src, *outfile, devname[OCL_QUERY_SIZE];

	programs = (cl_program *) malloc(num_source_files * sizeof(cl_program));
	if (programs == NULL)
	{
		fprintf(stderr, "Could not allocate memory for programs; exiting.\n");
		return 1;
	}

	/* Get device name and CL version */
	maxcver = ocl_openclc_versions(cldev, NULL, NULL);
	clGetDeviceInfo(cldev, CL_DEVICE_NAME, OCL_QUERY_SIZE, devname, NULL);
	fprintf(stderr, ">> Building '%s.%d' for OpenCL %d.%d.%d GPU %d (%s)\n", 
	                binfile_prefix, gpuid, VERMAJ(maxcver), VERMIN(maxcver), 
	                VERPAT(maxcver), gpuid, devname);
	fflush(stdout);
	 
	/* Get support for double */
	clGetDeviceInfo(cldev, CL_DEVICE_DOUBLE_FP_CONFIG,
	                sizeof(cl_device_fp_config), &(supportsdouble), NULL);

	/* Create a compute context on the first device you can */
	cprops[0] = CL_CONTEXT_PLATFORM;
	cprops[1] = (cl_context_properties) clplat;
	cprops[2] = (cl_context_properties) NULL;
	if ((context = clCreateContext(cprops, 1, &cldev, NULL, NULL, NULL)) == NULL)
	{
		fprintf(stderr, "Failed on clCreateContext() for gpu device %d\n", gpuid);
		return 1;
	}

	/* Prepare the build arguments */
	snprintf(buildopts, 511, "-Werror -I ./ -I ../ -I %s -D__OMPI_CL_KERNEL__ %s %s",
	           OMPI_ROOTDIR,
		       supportsdouble ? "-DOCLC_HAS_DOUBLE" : "",
		       VERMAJ(maxcver) == 2 ?
		         "-DOCLC_VERSION=2 -cl-std=CL2.0" : 
		         (VERMAJ(maxcver) == 3 ? "-DOCLC_VERSION=3 -cl-std=CL3.0" :
		           "-DOCLC_VERSION=1"));

	for (i = 0; i < num_source_files; i++)
	{
		/* Read the source file */
		if ((src = ocl_read_src(source_filenames[i])) == NULL)
		{
			fprintf(stderr, "Failed to read the sources for gpu device %d\n", gpuid);
			return 2;
		}

		/* Create the compute program from the source file */
		if ((programs[i] = clCreateProgramWithSource(context, 1, (const char **) &src,
												     NULL, &err)) == NULL)
		{
			fprintf(stderr, "Failed on clCreateProgramWithSource() for gpu device %d\n\t%s\n", 
				gpuid, ocl_errstr(err));
			return 2;
		}

		/* Compile */
		err = clCompileProgram(programs[i], 1, &cldev, buildopts, 0,
		                       NULL, NULL, NULL, NULL);
		if (err != CL_SUCCESS)
		{
			fprintf(stderr, "Failed on clCompileProgram() for gpu device %d\n>> %s\n-----\n", 
			       gpuid, ocl_errstr(err));
			ocl_show_clprogram_info(programs[i], cldev, devname);
			return 3;
		}
#ifdef DEBUG_CL
		ocl_show_clprogram_info(programs[i], cldev, devname);
#endif
		free(src);
	}

	/* Link the programs */
	linked_program = clLinkProgram(context, 1, &cldev, "-create-library", 
	                               num_source_files, programs, NULL, NULL, &err);
	if (err != CL_SUCCESS) 
	{
		fprintf(stderr, "Failed on clLinkProgram() for gpu device %d\n>> %s\n-----\n", 
		                gpuid, ocl_errstr(err));
		return 4;
	}

	/* Extract the number of devices from the binary (should be just 1) */
	if (clGetProgramInfo(linked_program, CL_PROGRAM_NUM_DEVICES,
	                     sizeof(size_t), &binsize, NULL) != CL_SUCCESS)
	{
		fprintf(stderr, "Failed on clGetProgramInfo(DEVICES) for gpu device %d\n", gpuid);
		return 5;
	}
	if (binsize != 1)
	{
		fprintf(stderr, "Unexpected error: program should be build for 1 device\n");
		return 6;
	}

	/* Now extract the binary size in bytes */
	if (clGetProgramInfo(linked_program, CL_PROGRAM_BINARY_SIZES,
	                     sizeof(size_t), &binsize, NULL) != CL_SUCCESS)
	{
		fprintf(stderr, "Failed on clGetProgramInfo(SIZES) for gpu device %d\n", gpuid);
		return 7;
	}
	
	/* Allocate space and get the binary */
	binary = (char *) malloc(binsize);
	if (binary == NULL)
	{
		fprintf(stderr, "Could not allocate memory for `binary`; exiting.\n");
		return 1;
	}

	if (clGetProgramInfo(linked_program, CL_PROGRAM_BINARIES,
	                     sizeof(char *), &binary, NULL) != CL_SUCCESS)
	{
		fprintf(stderr, "Failed on clGetProgramInfo(BINARIES) for gpu device %d\n", gpuid);
		return 8;
	}
#ifdef DEBUG_CL
	ocl_show_clprogram_info(linked_program, cldev, devname);
#endif

	/* Write the binary to file */
	outfile = (char *) malloc(strlen(output_dir) + strlen(binfile_prefix) + 5);
	if (binary == NULL)
	{
		fprintf(stderr, "Could not allocate memory for `outfile`; exiting.\n");
		return 1;
	}

	sprintf(outfile, "%s/%s.%d", output_dir, binfile_prefix, gpuid);
	if ((outfd = open(outfile, O_WRONLY | O_CREAT | O_TRUNC, /* 0644 */
	                           S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0)
	{
		fprintf(stderr, "Failed to create the output binary file (%s)\n", outfile);
		return 9;
	}
	if (write(outfd, binary, binsize) != binsize)
	{
		fprintf(stderr, "Failed to write to the output binary file (%s)\n", outfile);
		return 10;
	}
	close(outfd);

	clReleaseContext(context);
	for (i = 0; i < num_source_files; i++)
        clReleaseProgram(programs[i]);
	clReleaseProgram(linked_program);
	free(binary);
	free(outfile);

	return 0;
}


/* Callback machinery for precompile() */
static int _d_gpuid, _d_err;
static 
int _precompile_cb(int gpuid, cl_device_id cldev, cl_platform_id clplat)
{
	if (_d_gpuid < 0 || _d_gpuid == gpuid)  
	{
		_d_err += buildfor(cldev, clplat, gpuid);
		if (_d_gpuid == gpuid)   /* Stop iterating */
			return 1;
	}
	return 0;
}


/* If devid < 0, build for all devices, appending the device id */
int precompile(int gpuid)
{
	_d_gpuid = gpuid;
	_d_err = 0;
	if (ocl_iterate_gpus(-1, _precompile_cb) < 0)
		_d_err = -1;
	return _d_err;
}


int main(int argc, char** argv)
{
	int devid = 0, has_devopt = 0, i;

	if (argc < 4)
	{
		printf("Usage: %s [ -all | -<num> ] <output_dir> <cl_bin_file_prefix> <cl_src_files>\n", 
		              argv[0]);
		printf("\twhere option is either '-all' or '-<num>\n"
		       "\tto build for all available or specific device ids.\n"
		       "\tIf no option is given, build is for the first found device.\n");
		return -1;
	}
	
	if (strcmp(argv[1], "-all") == 0)
	{
		devid = -1;
		has_devopt = 1;
	}
	else
	{
		if (argv[1][0] == '-')
		{
			has_devopt = 1;
			devid = atoi(argv[1]+1);
		}
		if (devid < 0)
			devid = 0;
	}
	
	num_source_files = argc - has_devopt - 3;
	
	output_dir = argv[has_devopt + 1];
	binfile_prefix = argv[has_devopt + 2];

	source_filenames = (char **) malloc(num_source_files * sizeof(char));
	if (source_filenames == NULL)
	{
		fprintf(stderr, "Could not allocate memory for sources; exiting.\n");
		return 1;
	}

	for (i = 0; i < num_source_files; i++)
		source_filenames[i] = argv[has_devopt + 3 + i];

	if ((i = precompile(devid)) != 0)
		fprintf(stderr, "[clprecomp]: precompile failed (error: %d).\n", i);
	return i;
}
