#!/usr/bin/env bash

# OMPi OpenMP Compiler
# == Copyright since 2001 the OMPi Team
# == Dept. of Computer Science & Engineering, University of Ioannina
# 
# This file is part of OMPi.
# 
# OMPi is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# OMPi is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with OMPi; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#

# remote_offload_setup.sh
# Configures and installs OMPi with remote offloading enabled

# Script options
DEBUG_CONFIG=false
FROM_LAST_NODE=false
CPUS_ONLY=false
BUILD_ONLY=false
NUMJOBS=1
PARALLEL_BUILD=false
START_FROM_NODE=1
TEMPDIR_OPT=
NUM_WORKER_THREADS=1

SCRIPTDIR="$(cd "$(dirname "$0")" && pwd)"
MAKEJOBS=$(nproc)
INSTALLDIR="./"
ROFFOPTIONS=""
C_PROG_FILENAME="_setup_remote_nodes"
BUILD_FOLDER="./build"

LocalArgs=()
RemoteArgs=()

show_help_screen() {
	echo "This script sets up OMPi with remote offloading support, across the nodes of a cluster (listed in ~/.ompi_remote_devices)"
	echo
	echo "Usage: $0 [options]"
	echo
	echo "Options:"
	echo "  --prefix=installdir    set the OMPi installation directory"
	echo "  --cpus-only            setup OMPi for CPU-only cluster"
	echo "  --build-only --cpus-only run only meson/meson install (for CPU-only)"
	echo "  --debug-config         debug $C_PROG_FILENAME.c"
	echo "  --help                 show this help screen"
	echo 
	echo "Advanced options:"
	echo "  --jobs=num_jobs        set multiple jobs for OMPi setup"
	echo "  --from-node=node_id    setup OMPi starting from a specific node"
	echo "  --from-last-node       setup OMPi starting from the last node that failed previously"
	echo "  --static-procs         do not use MPI_Comm_spawn; create MPI processes statically (e.g. with mpirun)"
	echo "  --ignore-snapshot      do not use configuration snapshot to initialize remote offloading; use homedir config file instead."
}

append_roffopt() {
	if [ ! -z "$ROFFOPTIONS" ]; then
		ROFFOPTIONS="$ROFFOPTIONS,$1"
	else
		ROFFOPTIONS="$1"
	fi
}

handle_args() {
	for arg in "$@"; do
		if [[ $arg == -* ]]; then
			arg_name="${arg%%=*}"
			arg_value="${arg#*=}"

			if [[ "$arg_name" == "$arg_value" ]]; then
				local_arg_str="$arg_name"
				remote_arg_str="$arg_name"
			else
				local_arg_str="$arg_name=$arg_value"
				remote_arg_str="$arg_name=\"$arg_value\""
			fi

			lowercase_name=${arg_name,,}

			case $lowercase_name in
				--num-node-workers )
					if [[ "$arg_value" =~ ^[0-9]+$ ]]; then
						if [ "$arg_value" -le 0 ]; then
							echo "error: --num-node-workers can be either 'auto', 'max', or a positive integer"
							exit 0
						fi
					else
						if [[ "$arg_value" != "max" && "$arg_value" != "auto" ]]; then
							echo "error: --num-node-workers can be either 'auto', 'max', or a positive integer"
							exit 0
						fi
						append_roffopt "-num-node-workers=$arg_value"
					fi
					;;
				--num-worker-threads )
					if [[ "$arg_value" =~ ^[0-9]+$ ]]; then
						if [ "$arg_value" -le 0 ]; then
							echo "error: --num-worker-threads can be either 'auto', 'max', or a positive integer"
							exit 0
						fi
					else
						if [[ "$arg_value" != "max" && "$arg_value" != "auto" ]]; then
							echo "error: --num-worker-threads can be either 'auto', 'max', or a positive integer"
							exit 0
						fi
						append_roffopt "-num-worker-threads=$arg_value"
					fi
					;;
				--from-node )
					if [[ "$arg_value" =~ ^[0-9]+$ ]]; then
						if [ "$arg_value" -gt 0 ]; then
							START_FROM_NODE=$arg_value
						fi
					else
						echo "error: expected integer in --from-node arg"
						exit 0
					fi
					;;
				--from-last-node )
					FROM_LAST_NODE=true
					;;
				--cpus-only )
					CPUS_ONLY=true
					;;
				--static-procs )
					append_roffopt "-use-static-mpi-procs"
					;;
				--ignore-snapshot )
					append_roffopt "-ignore-snapshot"
					;;
				--build-only )
					BUILD_ONLY=true
					;;
				--debug-config )
					DEBUG_CONFIG=true
					;;
				--jobs )
					if [[ "$arg_value" =~ ^[0-9]+$ ]]; then
						if [ "$arg_value" -gt 0 ]; then
							NUMJOBS=$arg_value
						fi
					else
						echo "error: expected integer in --jobs arg"
						exit 0
					fi
					;;
				--help )
					show_help_screen
					exit 0
					;;
				--prefix )
					if [[ "$local_arg_str" == "$arg_name" ]]; then
						continue
					fi
					LocalArgs+=("$local_arg_str")
					if [[ "$NUMJOBS" == "1" ]]; then
						RemoteArgs+=("$remote_arg_str")
					fi
					INSTALLDIR="$arg_value"
					;;
				*)
					LocalArgs+=("$local_arg_str")
					RemoteArgs+=("$remote_arg_str")
					;;
			esac
		fi
	done
	
	if [[ "$NUMJOBS" != "1" ]]; then
		TEMPDIR_OPT="--tempdir=$INSTALLDIR/.tmp"
		PARALLEL_BUILD=true
	fi

	extraargs="-Dremote-offloading=true -Droff-options=$ROFFOPTIONS "
	LocalArgs+=($extraargs)
	RemoteArgs+=($extraargs)

	if [[ "$BUILD_ONLY" = true && "$CPUS_ONLY" = false ]]; then
		echo "error: '--build-only' should always be used with '--cpus-only'"
		exit 0
	fi
}

# Remote configuration and installation
remote_config() {
	local dbg_flag=""
	local create_cfg_exit_code=""
	if [[ "$DEBUG_CONFIG" = true ]]; then
		dbg_flag="-g"
	fi

	
	sh ./utilities/remote_offload_create_config.sh
	create_cfg_exit_code=$?

	if [[ $create_cfg_exit_code == "0" ]]; then
		echo ">> No configuration file found but I created one for you at $HOME/.ompi_remote_devices";
		echo ">> Please modify the file accordingly and run this script again."
		exit 1
	fi
	
	echo -n ">> Preparing remote configuration"
	if [[ "$ROFFOPTIONS" != "" ]]; then
		echo -n " with options: $ROFFOPTIONS... "
	else
		echo -n "... "
	fi

	if [[ -d "$BUILD_FOLDER" ]]; then
		cd $BUILD_FOLDER
		ninja uninstall > /dev/null 2>&1
		cd ..
		rm -rf $BUILD_FOLDER 
	fi
	
	echo " done."
	
	if [[ "$FROM_LAST_NODE" = true && -f "./.last_configured_node" ]]
	then
		START_FROM_NODE=$(cat ./.last_configured_node)
	fi

	ESCAPEDARGS=$(echo "${RemoteArgs[@]}" | sed 's/"/\\"/g')

	# C code for configuring remote nodes with remote_offload_local_setup script
	cat > $C_PROG_FILENAME.c << EOF
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include "./common/assorted.h"
#include "./common/str.h"
#include "./common/roff_config.h"

int *required_node_indices;
int num_required = 0;
int *done_module;

int ssh_check(int node_id)
{
	roff_config_node_t *node = &(roff_config.nodes[node_id]);
	int i, j, ssh_flag = 0,
	    totaldevs = node->num_modules - node->has_cpu_module;

	for (i = 0; i < totaldevs; ++i)
		for (j = 0; j < roff_config.nuniquemodules; ++j)
			if (!strcmp(roff_config.uniquemodnames[j],node->module_names[i]))
				if (!done_module[j])
					done_module[j] = ++ssh_flag;

	return ssh_flag;
}

void find_required_nodes()
{
	int i;
	done_module = calloc(roff_config.nuniquemodules, sizeof(int));
	required_node_indices = malloc(roff_config.num_nodes*sizeof(int));

	for (i=0; i<roff_config.num_nodes; ++i){
		if (strcmp(roff_config.nodes[i].name, "localhost") && ssh_check(i))
			required_node_indices[num_required++] = i;
	}
}

void setup(int node_index)
{
    char *module_list = NULL;
    int exit_code;
    str remote_command = Strnew();
	int node_id = required_node_indices[node_index];

    printf("---\n");
    printf("(%d/%d) Attempting to configure %s...\n", node_index+1, num_required, roff_config.nodes[node_id].name);
    if (!strcmp(roff_config.nodes[node_id].name, "localhost"))
    {
        printf("\twarning: Skipping localhost. The local machine is configured in the end.\n");
        return;
    } 
    else if (roff_config.nodes[node_id].num_modules <= 0) 
    {
        NOMODULES:
        printf("\twarning: %s seems to have no modules. It won't be configured.\n", 
                roff_config.nodes[node_id].name);
        return;
    }
    fflush(stdout);

    module_list = roff_config_get_node_modulestr(roff_config.nodes[node_id].name);
    if (module_list == NULL)
        goto NOMODULES;

    str_printf(remote_command, "ssh %s 'cd \"$SCRIPTDIR\"; INSTALLDIR=%s "
            "bash -s -- < ./utilities/remote_offload_local_setup.sh $TEMPDIR_OPT --hostname=\"%s\" --modules=\"%s\" %s' > %s/.install_info/install_info.%s",
            roff_config.nodes[node_id].name,
            "$INSTALLDIR",
            roff_config.nodes[node_id].name,
            module_list, 
            "$ESCAPEDARGS",
            "$INSTALLDIR",
             roff_config.nodes[node_id].name);
    
	exit_code = system(str_string(remote_command));
    if (exit_code)
    {
        printf("Configuration failed for %s (exit code: %d)\nAborting...\n", 
                roff_config.nodes[node_id].name, exit_code/256);
        roff_config_finalize();
        free(module_list);
        str_free(remote_command);
        exit(10 + node_id + 1);
    }

    if (module_list)
        free(module_list);

    str_free(remote_command);
}
    
int main(int argc, char* argv[]) {
    bool debug = ${DEBUG_CONFIG};
    int i, j, exit_code;
    int firstnode = ${START_FROM_NODE};
    int numjobs = ${NUMJOBS};
	str add_ssh_hosts_command = Str("bash -s -- < ./utilities/add_ssh_hosts.sh ");

    printf(">> Starting remote configuration.\n");
    roff_config_initialize(IGNORE_DISABLED_MODULES, false);
    
    printf(">> Parser found %d node(s) in .ompi_remote_devices\n", roff_config.num_nodes);
	if (roff_config.num_nodes == 0)
	{
		fprintf(stderr, "Empty configuration file; aborting...\n");
		roff_config_finalize();
		return 1;
	}

    if (firstnode > roff_config.num_nodes || numjobs > 1)
        firstnode = 1;

	find_required_nodes();

	if (roff_config_should_print_ssh_hosts())
	{
		printf(">> Please add the following lines to your SSH config file ($HOME/.ssh/config):\n\n");
		roff_config_print_ssh_hosts();
	}

	if (num_required > 0)
	{
		int chst = 0;
		printf("%d node(s) will be set up to cover all modules\n", num_required);
		if (firstnode == 1)
		{
			JOB_START(setupnode, ${NUMJOBS}, num_required)
			{
				JOB_LOOP(setupnode, node_id)
					setup(node_id);
			}
			chst = JOB_FINISH(setupnode, 0);
		}
		else
			for (i = firstnode - 1; i < num_required; ++i)
				setup(i);
	}
   
    roff_config_finalize();
	str_free(add_ssh_hosts_command);

    printf("Remote configuration completed.\n");
    return 0;
}
EOF
	
	# Run the program created above and remove its files, when not debugging
	gcc $dbg_flag -DREMOTE_OFFLOADING_SETUP_MODE common/str.c common/assorted.c common/roff_config.c $C_PROG_FILENAME.c -o $C_PROG_FILENAME
	./$C_PROG_FILENAME $@
	res=$?
	if [[ "$DEBUG_CONFIG" = false && -f $C_PROG_FILENAME.c && -f $C_PROG_FILENAME ]]; then
		rm $C_PROG_FILENAME.c
		rm $C_PROG_FILENAME
	fi
	
	if [[ "$res" != "0" ]]; then 
		# Setup failed for a specific node
		if [[ "$res" -ge "11" ]]; then
			echo $(($res - 10)) > .last_configured_node
		fi
		exit $res
	fi
}


# Local configuration and installation
local_config() {
	local redirect_to="${INSTALLDIR}/.install_info/install_info.local"
	echo ">> Starting local configuration."
	local flags="--primary"

	if [[ "$BUILD_ONLY" = true ]]; then
		redirect_to="2>&1"
		flags="--primary --build-only"
	fi

	INSTALLDIR=$INSTALLDIR ./utilities/remote_offload_local_setup.sh "${LocalArgs[@]}" $flags > $redirect_to
}

copy_all_libs() {
	local root_dir="${INSTALLDIR}/.tmp"
	local suffix="lib/ompi/devices"
	local suffix64="lib64/ompi/devices"
	local currdir=$(pwd)
	
	printf ">> Copying all node libraries to root installation directory..."
	for dir in $root_dir/*; do
		if [ -d "$dir/$suffix" ]; then
			cp -R $dir/$suffix/* "${INSTALLDIR}/${suffix}/"
		fi
		if [ -d "$dir/$suffix64" ]; then
			cp -R $dir/$suffix64/* "${INSTALLDIR}/${suffix64}/"
		fi
	done
	printf " done.\n"
}

handle_args "$@"

if [[ ! -d "${INSTALLDIR}/.install_info" ]]
then 
    mkdir -p "${INSTALLDIR}/.install_info"
fi

cd "${SCRIPTDIR}"

if [[ "$CPUS_ONLY" = false ]]; then 
	remote_config
fi

local_config

if [[ "$PARALLEL_BUILD" = true ]]; then
	copy_all_libs
fi

rm -rf ./.ompi_configured_modules_*

[[ -f ./.last_configured_node ]] && rm -rf ./.last_configured_node
echo "Installation completed."