/*
  OMPi OpenMP Compiler
  == Copyright since 2001 the OMPi Team
  == Dept. of Computer Science & Engineering, University of Ioannina

  This file is part of OMPi.

  OMPi is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  OMPi is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with OMPi; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/

/* x_for.c */

/*
 * 2023/03
 *   Refactored code substantially
 * 2020/01/15
 *   Added not-equal conditional operator as per OpenMP 5.0
 * 2019/10/27
 *   ort_num_iters() not needed any more; #iterations calculated directly
 *   in the output code.
 * 2015/04/09
 *   changed ort_num_iters from variadic
 * 2011/03/17:
 *   _ort_leaving_for() not needed for parallel for constructs.
 * 2010/12/21:
 *   bug fix to avoid wrong test for lastprivate iteration.
 * 2010/11/20:
 *   bug fixes in: runtime schedule code, a decreasing step case,
 *                 lastprivate code for collape()d loops
 * 2010/11/06:
 *   major changes; support for COLLAPSE().
 * 2009/05/11:
 *   added AUTO schedule type - implemented as STATIC for now.
 */


#include <string.h>
#include <assert.h>
#include "stddefs.h"
#include "x_for.h"
#include "x_clauses.h"
#include "x_reduction.h"
#include "ast_xform.h"
#include "ast_free.h"
#include "ast_copy.h"
#include "ast_print.h"
#include "ast_assorted.h"
#include "ast_arith.h"
#include "ast_types.h"
#include "str.h"
#include "ompi.h"
#include "ox_xform.h"


char *for_varnames(schedvartype_e svt)
{
	switch (svt)
	{
		case LOOP_PREFIX:
			return "";
		case LOOP_ITER:
			return "iter_";
		case LOOP_NITERS:
			return "niters_";
		case LOOP_FITER:
			return "fiter_";
		case LOOP_LITER:
			return "liter_";
		default:
			exit_error(1, "[for_varnames]: unknown variable type (%d)", svt);
	}
	return (char *) NULL;
}


/**
 * Affinitiy schedule code from old times; propably needs repairs.
 */
static void affinity_schedule(fordata_t *loopinfo, foresult_t *code)
{
	aststmt s;
	
	/* We need somthing else for the last private vars */
	if (loopinfo->haslast)
	{
		s = Declaration(    /* declare: <specs> niters_,iter_=1; */
		      ITERCNT_SPECS,
		      DeclList(
		        Declarator(NULL, 
		                   IdentifierDecl(Symbol(loopinfo->varname(LOOP_NITERS)))),
		        InitDecl(
		          Declarator(NULL, 
		                     IdentifierDecl(Symbol(loopinfo->varname(LOOP_ITER)))),
		          OneExpr()
		        )
		      )
		    );
		code->decls = Block2(code->decls, s);
	}
	
	/* Body */
	/* same loop, new body */
	if (loopinfo->haslast)
		code->mainpart = BlockList(   /* iter++ */
		                   Expression(PostOperator(IdentName("iter"), UOP_inc)),
		                   code->mainpart
		                 );
	if (loopinfo->haslast || loopinfo->ordplain)
		code->mainpart = Compound(code->mainpart);
	code->mainpart = 
		If(
		  FunctionCall(IdentName("ort_affine_iteration"), loopinfo->schedchunk),
		  code->mainpart,
		  NULL
		);
}


/**
 * Produce standard declarations in all split-iterations schedules.
 * Always non-NULL.
 */
aststmt for_iterdecls(fordata_t *loopinfo)
{
	aststmt s;
	int i;
	int x;
	
	/* niters, fiter, liter */
	s = Declaration(/* Initialize because if a thread gets no iterations, the */
	      ITERCNT_SPECS,  /* lastprivate check for iter==niters may succeed! */
	      DeclList(       /*  <specs> niters_=0,iter_=0,fiter_,liter_=0; */
	        DeclList(
	          DeclList(
	            InitDecl(
	              Declarator(NULL, 
	                      IdentifierDecl(Symbol(loopinfo->varname(LOOP_NITERS)))),
	              ZeroExpr()
	            ),
	            InitDecl(
	              Declarator(NULL, 
	                        IdentifierDecl(Symbol(loopinfo->varname(LOOP_ITER)))),
	              ZeroExpr()
	            )
	          ),
	          Declarator(NULL, 
	                     IdentifierDecl(Symbol(loopinfo->varname(LOOP_FITER))))
	        ),
	        InitDecl(
	          Declarator(NULL, 
	                     IdentifierDecl(Symbol(loopinfo->varname(LOOP_LITER)))),
	          ZeroExpr()
	        )
	      )
	    );

	/* We need vars for # iterations in complex loops */
	if (loopinfo->collapsenum > 1 || loopinfo->doacrossnum > 0)
	{
		int nestnum;
	
		if (loopinfo->collapsenum > 1)
			s = BlockList(
			      s,
			      Declaration(ITERCNT_SPECS,
			                  InitDecl(
			                    Declarator(NULL, IdentifierDecl(Symbol("pp_"))),
			                    OneExpr()
			                  ))
			    );
			       
		nestnum = (loopinfo->doacrossnum > loopinfo->collapsenum) ? 
		          loopinfo->doacrossnum : loopinfo->collapsenum;
		for (i = 0; i < nestnum; i++)
			s = BlockList(
			      s,
			      Declaration(
			        ITERCNT_SPECS,
			        InitDecl(
			          Declarator(NULL, IdentifierDecl(loopinfo->itersym[i])),
		            loop_iters(&loopinfo->forps[i])
			        )
			      )
			    );
	}

	/* Finally, we need the loop parameters for doacross loops */
	if (loopinfo->doacrossnum > 0)
	{
		astexpr elems;
		
		/* Form the initializer */
		elems = LongArray3Initer(loopinfo->forps[0].lb, loopinfo->forps[0].step,
		               loopinfo->forps[0].incrop, Identifier(loopinfo->itersym[0]));
		for (i = 1; i < loopinfo->doacrossnum; i++)
			elems = 
				CommaList(
					elems, 
					LongArray3Initer(loopinfo->forps[i].lb, loopinfo->forps[i].step, 
					          loopinfo->forps[i].incrop, Identifier(loopinfo->itersym[i]))
				);
		/* Declare and initialize _doacc_params_[][3] */
		s = BlockList(
				s, 
				Declaration(
					Declspec(SPEC_long),
					InitDecl(
						Declarator(
							NULL,
							ArrayDecl(
								ArrayDecl(IdentifierDecl(Symbol(DOACCPARAMS)),NULL,NULL),
								NULL,
								Constant("3")
							)
						),
						BracedInitializer(elems)
					)
				)
			);
	}
	
	return s;
}


/**
 * @brief Produce the main, normalized loop body
 * 
 * A single loop becomes:
 *   for (iter = fiter; iter < liter; iter++) {
 *     <var> = lb +/- iter*step
 *     <body>
 *   }
 * optimized as:
 *   for (iter = fiter, var = ...; iter < liter; iter++, var +/-= step) {
 *     <body>
 *   }
 * If there is an ordered clause, we insert "_ort_for_curriter(iter_)"
 * just before the body, to let the runtime know our current iteration.
 *
 * For a collapsed loop nest, the non-optimized version is output
 * and multiple <var>s are recovered.
 */
static aststmt for_std_mainpart(fordata_t *loopinfo, aststmt origbody)
{
	int i;
	aststmt idx;                           /* needed only for loop nest */
	symbol var = loopinfo->forps[0].var;   /* needed only in 1 loop */
	
	if (loopinfo->collapsenum > 1)         /* Recover all indices */
	{
		idx = AssignStmt(IdentName("pp_"), OneExpr());
		for (i = loopinfo->collapsenum - 1; i >= 0; i--)
		{
			idx = BlockList(
			        idx,
			        AssignStmt(
			          Identifier(loopinfo->forps[i].var),
			          BinaryOperator(
			            loopinfo->forps[i].incrop, //BOP_add,
			            ast_expr_copy(loopinfo->forps[i].lb),
			            BinaryOperator(
			              BOP_mul,
			              ast_expr_copy(loopinfo->forps[i].step),
			              Parenthesis(
			                BinaryOperator(
			                  BOP_mod,
			                  Parenthesis(
			                    BinaryOperator(
			                      BOP_div,
			                      IdentName(loopinfo->varname(LOOP_ITER)),
			                      IdentName("pp_")
			                    )
			                  ),
			                  Identifier(loopinfo->itersym[i])
			                )
			              )
			            )
			          )
			        )
			      );
			if (i != 0)
				idx = BlockList(
				        idx,
				        Expression(Assignment(IdentName("pp_"), ASS_mul,
				                              Identifier(loopinfo->itersym[i]))
				                  )
				      );
		}
	}
	
#define ORTCURRITER Expression(FunctionCall(IdentName("_ort_for_curriter"), \
                               IdentName(loopinfo->varname(LOOP_ITER))))
	if (loopinfo->collapsenum > 1) 
		return
			loop_normalize(Symbol(loopinfo->varname(LOOP_ITER)), 
			          IdentName(loopinfo->varname(LOOP_FITER)), NULL, 
			          IdentName(loopinfo->varname(LOOP_LITER)), NULL, origbody,
			          (loopinfo->ordplain ? BlockList(idx, ORTCURRITER) : idx), NULL);
	else    /* Optimize original loop index recovery */
		return
			loop_normalize(Symbol(loopinfo->varname(LOOP_ITER)), 
			                 IdentName(loopinfo->varname(LOOP_FITER)), 
			                 Assignment(Identifier(var),
			                            ASS_eq,
			                            BinaryOperator(
			                              loopinfo->forps[0].incrop, 
			                              ast_expr_copy(loopinfo->forps[0].lb),
			                              BinaryOperator(BOP_mul,
			                                IdentName(loopinfo->varname(LOOP_FITER)),
			                                ast_expr_copy(loopinfo->forps[0].step))
			                              )
			                 ),
			                 IdentName(loopinfo->varname(LOOP_LITER)), 
			                 Assignment(Identifier(var), 
			                            bop2assop(loopinfo->forps[0].incrop),
			                            ast_expr_copy(loopinfo->forps[0].step)),
			                 origbody, (loopinfo->ordplain ? ORTCURRITER:NULL), NULL);
#undef ORTCURRITER
}


void for_schedule_static(fordata_t *loopinfo, foresult_t *code)
{
	code->decls = Block2(code->decls, for_iterdecls(loopinfo));
	code->mainpart = 
		If(
		  parse_expression_string(
		    "_ort_get_static_default_chunk(%s, &%s, &%s)",
		    loopinfo->varname(LOOP_NITERS),
		    loopinfo->varname(LOOP_FITER),
		    loopinfo->varname(LOOP_LITER)
		  ),
		  Compound(loopinfo->mainpart_func(loopinfo, code->mainpart)),
		  NULL
		);
}


void for_schedule_static_chunksize(fordata_t *loopinfo,foresult_t *code)
{
	aststmt s = for_iterdecls(loopinfo);
	char *chsize;
	
	/* May need a declaration for non-constant chunk sizes */
	if (loopinfo->schedchunk && loopinfo->schedchunk->type == CONSTVAL)
		chsize = loopinfo->schedchunk->u.str;
	else   /* non constant */
	{
		chsize = CHUNKSIZE;
		s = BlockList(         /* expr for chunk size */
		      s,
		      Declaration(
		        ITERCNT_SPECS,
		        InitDecl(
		          Declarator(NULL, IdentifierDecl(Symbol(chsize))),
		          ast_expr_copy(loopinfo->schedchunk)
		        )
		      )
		    );
	}

	/* Declare 2 more vars */
	s = BlockList(
	    s,
	    Declaration( /* declare: int chid_, TN_=omp_get_num_threads(); */
	      Declspec(SPEC_int),
	      DeclList(
	        Declarator(NULL, IdentifierDecl(Symbol("chid_"))),
	        InitDecl(
	          Declarator(NULL, IdentifierDecl(Symbol("TN_"))),
	          Call0_expr("omp_get_num_threads")
	        )
	      )
	    )
	   );
			       
	code->decls = Block2(code->decls, s);
  
	/* The loop */
	s = loopinfo->mainpart_func(loopinfo, code->mainpart);
	code->mainpart = For(
	                   parse_blocklist_string("chid_ = omp_get_thread_num();"),
	                   NULL,
	                   parse_expression_string("chid_ += TN_"),
	                   Compound(
	                     BlockList(
	                       parse_blocklist_string(
	                         "%s = chid_*(%s);"
	                         "if (%s >= %s) break;"
	                         "%s = %s + (%s);"
	                         "if (%s > %s) %s = %s;",
	                         loopinfo->varname(LOOP_FITER), chsize, 
	                         loopinfo->varname(LOOP_FITER), 
	                         loopinfo->varname(LOOP_NITERS), 
	                         loopinfo->varname(LOOP_LITER), 
	                         loopinfo->varname(LOOP_FITER), chsize,
	                         loopinfo->varname(LOOP_LITER), 
	                         loopinfo->varname(LOOP_NITERS),
	                         loopinfo->varname(LOOP_LITER), 
	                         loopinfo->varname(LOOP_NITERS)
	                       ),
	                       s
	                     )
	                   )
	                 );
}


void for_schedule_runtime_or_auto(fordata_t *loopinfo, foresult_t *code)
{
	aststmt s = for_iterdecls(loopinfo), xdc;
	
	/* We need a declaration for the chunk size */
	s = BlockList(         /* expr for chunk size */
	      s,
	      Declaration(
	        ITERCNT_SPECS,
	        (loopinfo->schedtype == OC_runtime) ?
	          Declarator(NULL, IdentifierDecl(Symbol(CHUNKSIZE))) :
	          InitDecl(
	            Declarator(NULL, IdentifierDecl(Symbol(CHUNKSIZE))),
	            ast_expr_copy(loopinfo->schedchunk)
	          )
	      )
	    );
	
	/* Declare 2 more vars */
	xdc = Declaration(
	        Usertype(Symbol("chunky_t")),
	        Declarator(NULL, IdentifierDecl(Symbol("get_chunk_")))
	      );
	/* Substitute type by hand since the produced code may not get xformed */
	ast_xt_barebones_substitute(&(xdc->u.declaration.spec),
			                        &(xdc->u.declaration.decl));
	xdc = BlockList(
	        xdc,
	        Declaration(
	          Declspec(SPEC_int),
	          InitDecl(
	            Declarator(NULL, IdentifierDecl(Symbol("staticextra_"))),
	            numConstant(-1)
	          )
	        )
	      );
	code->decls = Block3(code->decls, s, xdc);

	code->decls = BlockList(
			code->decls,
			Declaration(
				Declspec(SPEC_void),
				InitDecl(
					Declarator(Declspec(SPEC_star),IdentifierDecl(Symbol("extra_"))),
					UOAddress(IdentName("staticextra_"))
				)
			)
		  );
			
	/* code */
	code->prologue = BlockList(
	                   code->prologue,
	               /* _ort_get_runtime_schedule_stuff(&get_chunk_, &chunksize_); */
	                   FuncCallStmt(
	                     IdentName(loopinfo->schedtype == OC_runtime ? 
	                                "_ort_get_runtime_schedule_stuff" : 
	                                "_ort_get_auto_schedule_stuff"),
	                     CommaList(
	                       UOAddress(IdentName("get_chunk_")),
	                       UOAddress(IdentName(CHUNKSIZE))	
	                     )
	                   )
	                 );

	code->prologue = BlockList(
	                   code->prologue,
	                   FuncCallStmt(
	                     IdentName("_ort_loop_initializations"),
	                     Comma4(
	                       IdentName(loopinfo->varname(LOOP_NITERS)),
	                       UOAddress(IdentName(CHUNKSIZE)),
						   UOAddress(IdentName("extra_")),
						   numConstant(loopinfo->schedtype == OC_runtime ? 1 : 0)
	                     )
	                   )
	                 );


	s = loopinfo->mainpart_func(loopinfo, code->mainpart);
	code->mainpart = 
	       While(
	         parse_expression_string(  /* Too big to do it by hand */
	           "(*get_chunk_)(%s, %s, %s, &%s, &%s, extra_)", 
	           loopinfo->varname(LOOP_NITERS), CHUNKSIZE,
	           (loopinfo->modifier == OCM_none || 
	            loopinfo->modifier == OCM_monotonic) ? "0" : "1",
	           loopinfo->varname(LOOP_FITER), loopinfo->varname(LOOP_LITER)
	         ),
	         Compound(s)
	       );
}


void for_schedule_dynamic_or_guided(fordata_t *loopinfo, foresult_t *code)
{
	aststmt s = for_iterdecls(loopinfo);
	char *chsize = NULL;
	
	/* May need a declaration for non-constant chunk sizes */
	if (loopinfo->schedchunk)
	{
		if (loopinfo->schedchunk->type == CONSTVAL)
			chsize = loopinfo->schedchunk->u.str;
		else   /* non constant */
		{
			chsize = CHUNKSIZE;
			s = BlockList(         /* expr for chunk size */
			      s,
			      Declaration(
			        ITERCNT_SPECS,
			        InitDecl(
			          Declarator(NULL, IdentifierDecl(Symbol(chsize))),
			          ast_expr_copy(loopinfo->schedchunk)
			        )
			      )
			    );
		}
	}
	code->decls = BlockList(code->decls, s);
	
  s = loopinfo->mainpart_func(loopinfo, code->mainpart);
	code->mainpart = 
	       While(
	           parse_expression_string(
	           "_ort_get_%s_chunk(%s,%s,%s,&%s,&%s,(int*)0)",
	           loopinfo->schedtype == OC_guided ? "guided" : "dynamic",
	           loopinfo->varname(LOOP_NITERS), loopinfo->schedchunk ? chsize:"1",
	           (loopinfo->modifier == OCM_none || 
	            loopinfo->modifier == OCM_monotonic) ? "1" : "0",
	           loopinfo->varname(LOOP_FITER), loopinfo->varname(LOOP_LITER)
	         ),
	         Compound(s)
	       );
}


/* Possible clauses:
 * private, firstprivate, lastprivate, reduction, nowait, ordered, schedule,
 * collapse.
 */
void xform_for(aststmt *t)
{
	xform_ompcon_body((*t)->u.omp);
	aststmt   s = (*t)->u.omp->body, parent = (*t)->parent, v, 
	          lasts = NULL, reds = NULL, redarrinits = NULL, redfree = NULL, 
	          treds = NULL, stmp, embdcls = NULL, arrsecxvars = NULL;
	forparts_t forps[MAXLOOPS];
	astexpr   lb, ub, step, expr, elems;
	symbol    var, itersym[MAXLOOPS];
	int       incrop, condop;
	int       schedtype = OC_static /* default */, modifer = OCM_none,
	          static_chunk = 0, i = 0, collapsenum = 1, doacrossnum = 0, nestnum;
	bool      ispfor = ((*t)->u.omp->type == DCFOR_P);
	bool      haslast, hasboth, hasred;
	astexpr   schedchunk = NULL;    /* the chunksize expression */
	char      *chsize = NULL,       /* the chunksize value or variable */
	          iterstr[128], clabel[22];
	ompclause nw  = xc_ompcon_get_clause((*t)->u.omp, OCNOWAIT),
	          sch = xc_ompcon_get_clause((*t)->u.omp, OCSCHEDULE),
	          ord = xc_ompcon_get_clause((*t)->u.omp, OCORDERED),
	          ordnum = xc_ompcon_get_clause((*t)->u.omp, OCORDEREDNUM),
	          col = xc_ompcon_get_clause((*t)->u.omp, OCCOLLAPSE),
	          tag = xc_ompcon_get_clause((*t)->u.omp, OCTAG);
	bool      needbarrier = (nw == NULL &&
	                         xform_implicit_barrier_is_needed((*t)->u.omp));
	symtab    dvars;
	fordata_t info = { 0 };
	foresult_t code = { NULL };


	v = ompdir_commented((*t)->u.omp->directive); /* Put directive in comments */
	
	/*
	 * Preparations
	 */

	/* The name of the label used for canceling. We use line number to avoid
	 * conflicts
	 */
	snprintf(clabel, 22, "CANCEL_for_%d", (*t)->u.omp->l);

	if (sch)
	{
		schedtype  = sch->subtype;      /* OC_static, OC_... */
		schedchunk = sch->u.expr;
		if (schedtype == OC_static && sch->subtype != OC_auto && schedchunk)
			static_chunk = 1;
		if (schedtype == OC_affinity && schedchunk)
			schedchunk = ast_expr_copy(schedchunk);
		/* Optimize: if schedchunk is a constant, don't use a variable for it */
		if (schedchunk && schedchunk->type == CONSTVAL)
			chsize = strdup(schedchunk->u.str);    /* memory leak */
		modifer = sch->modifier;
	}

	if (ord && modifer == OCM_nonmonotonic)
		exit_error(1, "(%s, line %d) openmp error:\n\t"
		     "nonmonotonic schedules are not allowed along with ordered clauses.\n",
		     (*t)->u.omp->directive->file->name, (*t)->u.omp->directive->l);
	
	if (ord && ordnum)
		exit_error(1, "(%s, line %d) openmp error:\n\t"
		     "plain ordered clauses are not allowed in doacross loops.\n",
		     (*t)->u.omp->directive->file->name, (*t)->u.omp->directive->l);

	if (col)
	{
		if ((collapsenum = col->subtype) >= MAXLOOPS)
			exit_error(1, "(%s, line %d) ompi error:\n\t"
				"cannot collapse more than %d FOR loops.\n",
				(*t)->u.omp->directive->file->name, (*t)->u.omp->directive->l,MAXLOOPS);
	}

	if (ordnum)
	{
		if ((doacrossnum = ordnum->subtype) >= MAXLOOPS)
			exit_error(1, "(%s, line %d) ompi error:\n\t"
				"doacross loop nests should have up to %d FOR loops.\n",
				(*t)->u.omp->directive->file->name, (*t)->u.omp->directive->l,MAXLOOPS);
		if (doacrossnum < collapsenum)
			exit_error(1, "(%s, line %d) ompi error:\n\t"
		             "doacross loop collapse number cannot be larger "
		             "than its ordered number.\n",
		             (*t)->u.omp->directive->file->name, (*t)->u.omp->directive->l);
	}
	
	/* Collect all data clause vars - we need to check if any vars
	 * are both firstprivate and lastprivate
	 */
	dvars = xc_validate_store_dataclause_vars((*t)->u.omp->directive);

	/* Analyze the loop(s) */
	nestnum = (doacrossnum > collapsenum) ? doacrossnum : collapsenum;
	loopnest_analyze(s, nestnum, collapsenum, forps, *t, dvars, &embdcls);
	
	/* Prepare the loop info */
	info.haslast = haslast;
	info.ordplain = (ord != NULL);
	info.collapsenum = collapsenum;
	info.doacrossnum = doacrossnum;
	info.schedtype = schedtype;
	info.schedchunk = schedchunk;
	info.forps = forps;
	info.itersym = itersym;
	info.mainpart_func = for_std_mainpart;
	info.varname = for_varnames;

	/* Remember the last loop and var; form normalized iteration variables */
	var = forps[collapsenum-1].var;
	s = forps[collapsenum-1].s;
	for (i = 0; i < nestnum; i++)
	{
		sprintf(iterstr, "iters_%s_", forps[i].var->name);
		itersym[i] = Symbol(iterstr); /* Remember the normalized iteration index */
	}
	
	/*
	 * Declarations and initializations
	 */
	
	/* get possibly new variables for array section parameters */
	arrsecxvars = red_arrayexpr_simplify((*t)->u.omp->directive);

	/* declarations from the collected vars (not the clauses!) */
	code.decls = verbit("/* declarations (if any) */");
	stmp = xc_stored_vars_declarations(&haslast, &hasboth, &hasred);
	if (stmp)
		code.decls = Block2(code.decls, stmp);
	if (arrsecxvars)
		code.decls = Block2(arrsecxvars, code.decls);
	if (embdcls)
		code.decls = BlockList(code.decls, embdcls);

	/* initialization statements for firstprivate non-scalar vars */
	code.inits = verbit("/* initializations (if any) */");
	if ((stmp = xc_ompdir_fiparray_initializers((*t)->u.omp->directive)) != NULL)
		code.inits = Block2(code.inits, stmp);
	
	/* assignments for lastprivate vars */
	if (haslast)
		lasts = xc_ompdir_lastprivate_assignments((*t)->u.omp->directive);
	if (hasred)
	{
		/* Temporary local variables should be kept till the reduction operation
		 * is fully completed; this is guaranteed after a barrier, so we must
		 * turn off any barrier removals.
		 * TODO: maybe we should re-design reductions...
		 */
		if (!oldReduction)
			needbarrier = true;
		/* Initializers for array reductions */
		if ((redarrinits = red_array_initializers_from_ompstmt(*t)) != NULL)
			code.inits = BlockList(code.inits,redarrinits);
		/* Code to do the reductions */
		reds = red_generate_code_from_ompstmt(*t);
		/* Possible de-allocations to go after the barrier */
		redfree = red_generate_deallocations_from_ompstmt(*t);
		/* Code for tasking reductions, if any */
		treds = tred_register_from_ompstmt(*t);
		if (treds)
		{
			code.inits = BlockList(code.inits, treds); /* register task reductions */
			/* Quick and dirty: barrier + scope just before the actual reductions */
			reds = Block4(
			         verbit("/* end of task reductions scope */"),
			         BarrierCall(), 
			         Call0_stmt("_ort_taskscope_end"), 
			         reds
			       );
		}
	}


	/*
	 * Prologue
	 */
	
	/* Append our new code: niters_ = ...; _ort_entering_for(...); */
	if (collapsenum == 1 && doacrossnum == 0)
		elems = CastLong(loop_iters(&forps[0]));
	else
		for (elems = Identifier(itersym[0]), i = 1; i < collapsenum; i++)
			elems = BinaryOperator(BOP_mul, elems, Identifier(itersym[i]));
	expr = elems;

	if (ordnum)               /* Need more info for doacross loops */
		stmp = Expression(      /* _ort_entering_doacross(nw,doacnum,collnum,...); */
	           FunctionCall(
	             IdentName("_ort_entering_doacross"),
	             Comma6(
	               numConstant(nw ? 1 : 0),
	               numConstant(doacrossnum),
	               numConstant(collapsenum),
	               numConstant(FOR_CLAUSE2SCHED(schedtype, static_chunk)),
	               schedchunk ? IdentName(chsize) : numConstant(-1),
	               IdentName(DOACCPARAMS)
	             )
	           )
	         );
	else
		stmp = Expression(      /* _ort_entering_for(nw,ord); */
	           FunctionCall(
	             IdentName("_ort_entering_for"),
	             Comma2(numConstant(nw ? 1 : 0), numConstant(ord ? 1 : 0))
	           )
	         );

	stmp = BlockList(
	         Expression(     /* niters_ = ... */
	           Assignment(IdentName(info.varname(LOOP_NITERS)), ASS_eq, expr)
	         ),
	         stmp
	       );
	if (hasboth)   /* a var is both fip & lap; this needs a barrier here :-( */
		stmp = BlockList(stmp, BarrierCall());
	
	code.prologue = stmp;    /* Guaranteed to be non-NULL */

	#ifdef OMPI_OMP_EXT
	if (tag)
		code.prologue = BlockList(code.prologue, xform_tag_clause(tag->u.expr));
	#endif

	/*
	 * Main part
	 */
	
	/* Just leave the original body and let the schedules utilize it */
	code.mainpart = s->body;
	
	/*
	 * Epilogue
	 */
	
	/* Add a label that is used when canceling */
	code.epilogue = Labeled(Symbol(clabel), Expression(NULL));
	if (!ispfor || ord || ordnum)   /* Still need it if ordered clause exists */
		code.epilogue = BlockList(code.epilogue, Call0_stmt("_ort_leaving_for"));
	/* Add lastprivate assignments */
	if (lasts)
	{
		if (collapsenum > 1)
		{
			aststmt idx;
		
			idx = Expression(Assignment(Identifier(forps[0].var), 
			                            bop2assop(forps[0].incrop), 
			                            ast_expr_copy(forps[0].step)));
			for (i = 1; i < collapsenum; i++)
				idx = BlockList(
				        idx,
				        Expression(Assignment(Identifier(forps[i].var), 
				                              bop2assop(forps[i].incrop), 
				                              ast_expr_copy(forps[i].step))
				        )
				      );
			lasts = BlockList(idx, lasts);
		}

		code.epilogue = 
			BlockList(
			  code.epilogue,
			  If(
			    BinaryOperator(BOP_land,
			      IdentName(info.varname(LOOP_ITER)),
			      BinaryOperator(BOP_eqeq,
			        IdentName(info.varname(LOOP_ITER)),
			        IdentName(info.varname(LOOP_NITERS))
			      )
			    ),
			    lasts->type == STATEMENTLIST ?  Compound(lasts) : lasts,
			    NULL
			  )
			);
	}
	/* Add reduction assignments */
	if (reds)
		code.epilogue = BlockList(code.epilogue, reds);
	if (needbarrier)
		code.epilogue = BlockList(code.epilogue, BarrierCall());
	else
		if (!nw)   /* We ditched the barrier; but should at least flush */
			code.epilogue = BlockList(code.epilogue, Call0_stmt("_ort_fence")); 
	if (redfree)
		code.epilogue = BlockList(code.epilogue, redfree);
	if (tag)
		code.epilogue = BlockList(code.epilogue, Call0_stmt("_ort_pop_tag"));

	/*
	 * Get loop specific code and combine the parts
	 */
	
	/* schedule-specific actions */
	switch (schedtype)
	{
		case OC_static:
			if (schedchunk)
				for_schedule_static_chunksize(&info, &code);
			else
				for_schedule_static(&info, &code);
			break;
		case OC_dynamic:
		case OC_guided:
			for_schedule_dynamic_or_guided(&info, &code);
			break;
		case OC_runtime:
		case OC_auto:
			for_schedule_runtime_or_auto(&info, &code);
			break;
	}
	
	(*t)->u.omp->body = NULL;     /* Make it NULL so as to free it easily */
	ast_free(*t);                 /* Get rid of the OmpStmt */
	*t = Block6(v, code.decls, code.inits, code.prologue, code.mainpart, 
	            code.epilogue);
	*t = Compound(*t);
	ast_stmt_parent(parent, *t);

}


void xform_forsimd(aststmt *t)
{
	fprintf(stderr, "#pragma omp for simd: unsupported; please wait a bit...\n");
}
