/*
  OMPi OpenMP Compiler
  == Copyright since 2001 the OMPi Team
  == Dept. of Computer Science & Engineering, University of Ioannina

  This file is part of OMPi.

  OMPi is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  OMPi is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with OMPi; if not, write to the Free Software
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/

/* x_distribute.c */

#include <string.h>
#include <assert.h>
#include "stddefs.h"
#include "x_distribute.h"
#include "x_loops.h"
#include "x_clauses.h"
#include "x_reduction.h"
#include "ast_types.h"
#include "ast_xform.h"
#include "ast_free.h"
#include "ast_copy.h"
#include "ast_print.h"
#include "ast_assorted.h"
#include "ast_arith.h"
#include "str.h"
#include "ompi.h"

#define NORMALIZEDITER_DIST "dist_iter_"


/**
 * Transform a for loop statement in a slightly different way,
 * appropriate for distribute-related constructs.
 */
static
void _xform_for_fordist(aststmt *t)
{
	aststmt   s = (*t)->u.omp->body, parent = (*t)->parent, v, realbody,
	          decls, inits = NULL, lasts = NULL, reds = NULL, redarrinits = NULL, 
	          redfree = NULL, stmp, embdcls = NULL, arrsecxvars = NULL;
	astexpr   lb, ub, step, lbs[MAXLOOPS], ubs[MAXLOOPS], steps[MAXLOOPS], 
	          expr, elems;
	symbol    var, realvar, vars[MAXLOOPS];
	int       incrop, condop, stepdir[MAXLOOPS];
	int       schedtype = OC_static /* default */, modifer = OCM_none,
	          static_chunk = 0, i = 0, collapsenum = 1, doacrossnum = 0, nestnum;
	bool      ispfor = ((*t)->u.omp->type == DCFOR_P);
	bool      haslast, hasboth, hasred;
	astexpr   schedchunk = NULL;    /* the chunksize expression */
	char      *chsize = NULL,       /* the chunksize value or variable */
	          iters[MAXLOOPS][128],
	          plainstep,
	          plainsteps[MAXLOOPS],
	          clabel[22];
	ompclause nw  = xc_ompcon_get_clause((*t)->u.omp, OCNOWAIT),
	          sch = xc_ompcon_get_clause((*t)->u.omp, OCSCHEDULE),
	          ord = xc_ompcon_get_clause((*t)->u.omp, OCORDERED),
	          ordnum = xc_ompcon_get_clause((*t)->u.omp, OCORDEREDNUM),
	          col = xc_ompcon_get_clause((*t)->u.omp, OCCOLLAPSE);
	bool      needbarrier = (nw == NULL &&
	                         xform_implicit_barrier_is_needed((*t)->u.omp));
	symtab    dvars;
	stentry   varentry;

	v = ompdir_commented(OmpStmtDir(*t)); /* Put directive in comments */

	/*
	 * Preparations
	 */

	/* The name of the label used for canceling. We use line number to avoid
	 * conflicts
	 */
	snprintf(clabel, 22, "CANCEL_for_%d", (*t)->u.omp->l);

	if (sch)
	{
		if (sch->subtype == OC_auto)      /* Implement AUTO as STATIC for now */
			schedtype = OC_static;          /* but we will ignore any chunksize */
		else
			schedtype  = sch->subtype;      /* OC_static, OC_... */
		schedchunk = sch->u.expr;
		if (schedtype == OC_static && sch->subtype != OC_auto && schedchunk)
			static_chunk = 1;
		if (schedtype == OC_affinity && schedchunk)
			schedchunk = ast_expr_copy(schedchunk);
		/* Optimize: if schedchunk is a constant, don't use a variable for it */
		if (schedchunk && schedchunk->type == CONSTVAL)
			chsize = strdup(schedchunk->u.str);    /* memory leak */
		modifer = sch->modifier;
	}

	if (ord && modifer == OCM_nonmonotonic)
		exit_error(1, "(%s, line %d) openmp error:\n\t"
		     "nonmonotonic schedules are not allowed along with ordered clauses.\n",
		     OmpStmtDir(*t)->file->name, OmpStmtDir(*t)->l);
	
	if (ord && ordnum)
		exit_error(1, "(%s, line %d) openmp error:\n\t"
		     "plain ordered clauses are not allowed in doacross loops.\n",
		     OmpStmtDir(*t)->file->name, OmpStmtDir(*t)->l);

	if (col)
	{
		if ((collapsenum = col->subtype) >= MAXLOOPS)
			exit_error(1, "(%s, line %d) ompi error:\n\t"
				"cannot collapse more than %d FOR loops.\n",
				OmpStmtDir(*t)->file->name, OmpStmtDir(*t)->l,MAXLOOPS);
	}

	if (ordnum)
	{
		if ((doacrossnum = ordnum->subtype) >= MAXLOOPS)
			exit_error(1, "(%s, line %d) ompi error:\n\t"
				"doacross loop nests should have up to %d FOR loops.\n",
				OmpStmtDir(*t)->file->name, OmpStmtDir(*t)->l,MAXLOOPS);
		if (doacrossnum < collapsenum)
			exit_error(1, "(%s, line %d) ompi error:\n\t"
		             "doacross loop collapse number cannot be larger "
		             "than its ordered number.\n",
		             OmpStmtDir(*t)->file->name, OmpStmtDir(*t)->l);
	}
	
	/* Collect all data clause vars - we need to check if any vars
	 * are both firstprivate and lastprivate
	 */
	dvars = xc_validate_store_dataclause_vars(OmpStmtDir(*t));

	/* Analyze the loop(s) */
	nestnum = (doacrossnum > collapsenum) ? doacrossnum : collapsenum;
	i = 0;
	do
	{
		analyze_omp_for(s, &var, &lb, &ub, &step, &condop, &incrop);

		/* First check if the loop variable has been enlisted; if not,
		 * it is automatically considered private (v25) - so we make it
		 * appear as if there was a private(var) clause.
		 */
		if ((varentry = symtab_get(dvars, var, IDNAME)) == NULL)
		{
			if (s->u.iteration.init->type == EXPRESSION)
				symtab_put(dvars, var, IDNAME)->ival = OCPRIVATE;
			else
				embdcls = (embdcls) ?
					BlockList(
						embdcls,
						Declaration( /* without the initializer */
							ast_spec_copy(s->u.iteration.init->u.declaration.spec),
							ast_decl_copy(s->u.iteration.init->u.declaration.decl->decl)
						)
					) :
					Declaration(
						ast_spec_copy(s->u.iteration.init->u.declaration.spec),
						ast_decl_copy(s->u.iteration.init->u.declaration.decl->decl)
					);
		}
		else
		{
			if (s->u.iteration.init->type != EXPRESSION)  /* a declaration */
				exit_error(1, "(%s, line %d) openmp error:\n\t"
					"iteration variable '%s' is declared within the FOR statement\n\t"
					"and thus it cannot appear in the directive's data clauses.\n",
					OmpStmtDir(*t)->file->name, OmpStmtDir(*t)->l, var->name);
			/* Remove the FIRSTPRIVATE attribute if any (there is no use for it) */
			/* Actually, v25 (p.64,l.23) specifies that the iteration variable
			 * can only appear in a PRIVATE or LASTPRIVATE clause, so we should
			 * emit at least a warning.
			 */
			if (varentry->ival == OCFIRSTPRIVATE || varentry->ival == OCFIRSTLASTPRIVATE)
				warning("(%s, line %d) warning:\n\t"
					"iteration variable '%s' cannot appear in a FIRSTPRIVATE clause..\n\t"
					"  .. let's pretend it was in a PRIVATE clause.\n",
					OmpStmtDir(*t)->file->name, OmpStmtDir(*t)->l, var->name);
			if (varentry->ival == OCFIRSTPRIVATE)
				varentry->ival = OCPRIVATE;
			else
				if (varentry->ival == OCFIRSTLASTPRIVATE)
					varentry->ival = OCLASTPRIVATE;
		}
		
		if (step == NULL || step->type == CONSTVAL)   /* ++/-- or += constant */
		{
			plainstep = (step == NULL) ? 1 : 2;
			step = (step == NULL) ? OneExpr() : ast_expr_copy(step);
		}
		else /* step != NULL && general expression for step */
		{
			step = Parenthesis(ast_expr_copy(step));   /* An expression */
			plainstep = 0;
		}

		vars[i] = var;
		lbs[i]  = Parenthesis(ast_expr_copy(lb));
		ubs[i]  = Parenthesis(
		            (condop == BOP_leq || condop == BOP_geq) ?  /* correct ub */
		            BinaryOperator((condop == BOP_leq) ? BOP_add : BOP_sub,
		                           Parenthesis(ast_expr_copy(ub)),
		                           OneExpr()) :
		            ast_expr_copy(ub)
		          );
		steps[i] = step;
		stepdir[i] = incrop;
		sprintf(iters[i], "dist_iters_%s_", var->name);
		plainsteps[i] = plainstep;      /* TODO: actually use this info */

		if (i == collapsenum-1)
		{
			realbody = s;                 /* Remember where the real body is */
			realvar = var;
		}
		if (i < nestnum - 1)
		{
			s = s->body;
			if (s != NULL && s->type == COMPOUND && s->body != NULL &&
			    s->body->type == ITERATION && s->body->subtype == SFOR)
				s = s->body;  /* { For } -> For */
			if (s == NULL || s->type != ITERATION || s->subtype != SFOR)
				exit_error(1, "(%s, line %d) openmp error:\n\t"
					"%d perfectly nested FOR loops were expected.\n",
					OmpStmtDir(*t)->file->name, OmpStmtDir(*t)->l,
					nestnum, nestnum);
		}
	}
	while ((++i) < nestnum);
	s = realbody;
	var = realvar;

	/* get possibly new variables for array section parameters */
	arrsecxvars = red_arrayexpr_simplify(OmpStmtDir(*t));

	/* declarations from the collected vars (not the clauses!) */
	decls = xc_stored_vars_declarations(&haslast, &hasboth, &hasred);
	if (arrsecxvars)
		decls = decls ? Block2(arrsecxvars, decls) : arrsecxvars;
	/* initialization statements for firstprivate non-scalar vars */
	if (decls)
		inits = xc_ompdir_fiparray_initializers(OmpStmtDir(*t));
	/* assignments for lastprivate vars */
	if (haslast)
		lasts = xc_ompdir_lastprivate_assignments(OmpStmtDir(*t));
	if (hasred)
	{
		/* Temporary local variables should be kept till the reduction operation
		 * is fully completed; this is guaranteed if after a barrier, so we must
		 * turn off any barrier removals.
		 * TODO: maybe we should re-design reductions...
		 */
		if (!oldReduction)
			needbarrier = true;
		/* Initializers for array reductions */
		redarrinits = red_array_initializers_from_ompstmt(*t);
		if (redarrinits)
			inits = (inits) ? BlockList(inits, redarrinits) : redarrinits;
		/* Code to do the reductions */
		reds = red_generate_code_from_ompstmt(*t);
		/* Possible de-allocations to go after the barrier */
		redfree = red_generate_deallocations_from_ompstmt(*t);
	}

	stmp  = (embdcls) ? embdcls : verbit(" ");
	if (schedtype == OC_affinity)
	{
		if (haslast)
			stmp = BlockList(
			         stmp,
			         Declaration(    /* declare: <specs> niters_,iter_=1; */
			           ITERCNT_SPECS,
			           DeclList(
			             Declarator(NULL, IdentifierDecl(Symbol("niters_"))),
			             InitDecl(
			               Declarator(NULL, IdentifierDecl(Symbol(NORMALIZEDITER))),
			               OneExpr()
			             )
			           )
			         )
			       );
	}
	else
	{
		stmp = BlockList(
		         stmp,  /* Initialize because if a thread gets no iterations, the */
		         Declaration(  /* lastprivate check for iter==niters may succeed! */
		           ITERCNT_SPECS,  /*  <specs> niters_=0,iter_=0,fiter_,liter_=0; */
		           DeclList(
		             DeclList(
		               DeclList(
		                 InitDecl(
		                   Declarator(NULL, IdentifierDecl(Symbol("niters_"))),
		                   ZeroExpr()
		                 ),
		                 InitDecl(
		                   Declarator(NULL, IdentifierDecl(Symbol(NORMALIZEDITER))),
		                   ZeroExpr()
		                 )
		               ),
		               Declarator(NULL, IdentifierDecl(Symbol("fiter_")))
		             ),
		             InitDecl(
		               Declarator(NULL, IdentifierDecl(Symbol("liter_"))),
		               ZeroExpr()
		             )
		           )
		         )
		       );
		if (collapsenum > 1 || doacrossnum > 0)  /* We need vars for # iterations */
		{
			if (collapsenum > 1)
				stmp = BlockList(
				         stmp,
				         Declaration(ITERCNT_SPECS,
				                     InitDecl(
				                       Declarator(NULL, IdentifierDecl(Symbol("pp_"))),
				                       OneExpr()
				                     ))
				       );
		}
		if (chsize == NULL)
		{
			chsize = "chunksize_";
			if (schedchunk || schedtype == OC_runtime)     /* expr for chunk size */
			{
				stmp = BlockList(
				         stmp,
				         Declaration(
				           ITERCNT_SPECS,
				           (schedtype == OC_runtime) ?
				             Declarator(NULL, IdentifierDecl(Symbol(chsize))) :
				             InitDecl(
				               Declarator(NULL, IdentifierDecl(Symbol(chsize))),
				               ast_expr_copy(schedchunk)
				             )
				         )
				       );
			}
		}
		/* we may need 2 more variables (int) */
		if (static_chunk)
			stmp = BlockList(
			         stmp,
			         Declaration( /* declare: int chid_, TN_=omp_get_num_threads(); */
			           Declspec(SPEC_int),
			           DeclList(
			             Declarator(NULL, IdentifierDecl(Symbol("chid_"))),
			             InitDecl(
			               Declarator(NULL, IdentifierDecl(Symbol("TN_"))),
			               Call0_expr("omp_get_num_threads")
			             )
			           )
			         )
			       );
		/* we may need 2 more variables: get_chunk_ & staticextra_ */
		if (schedtype == OC_runtime)
		{
			aststmt xdc = Declaration(
			                Usertype(Symbol("chunky_t")),
			                Declarator(NULL, IdentifierDecl(Symbol("get_chunk_")))
			              );
			/* Substitute type by hand since the produced code may not get xformed */
			ast_xt_barebones_substitute(&(xdc->u.declaration.spec),
			                        &(xdc->u.declaration.decl));
			xdc = BlockList(
			        xdc,
			        Declaration(
			          Declspec(SPEC_int),
			          InitDecl(
			            Declarator(NULL, IdentifierDecl(Symbol("staticextra_"))),
			            numConstant(-1)
			          )
			        )
			      );
			stmp = BlockList(stmp, xdc);
		}
	}

	/* Finally, we need the loop parameters for doacross loops */
	if (ordnum)
	{
		/* Form the initializer */
		elems = LongArray3Initer(lbs[0], steps[0], stepdir[0], IdentName(iters[0]));
		for (i = 1; i < doacrossnum; i++)
			elems = 
				CommaList(
					elems, 
					LongArray3Initer(lbs[i], steps[i], stepdir[i], IdentName(iters[i]))
				);
		/* Declare and initialize _doacc_params_[][3] */
		stmp = BlockList(
				stmp, 
				Declaration(
					Declspec(SPEC_long),
					InitDecl(
						Declarator(
							NULL,
							ArrayDecl(
								ArrayDecl(IdentifierDecl(Symbol(DOACCPARAMS)),NULL,NULL),
								NULL,
								Constant("3")
							)
						),
						BracedInitializer(elems)
					)
				)
			);
	}

	decls = (decls) ? BlockList(decls, stmp) : stmp;

	/*
	 * Do the job
	 */

	(*t)->u.omp->body = NULL;     /* Make it NULL so as to free it easily */
	ast_free(*t);                 /* Get rid of the OmpStmt */

	if (inits)
		decls = BlockList(decls, inits);  /* Append the initialization statements */

	/* Append our new code: niters_ = ...; ort_entering_for(...); */

	expr = CastLong(
		Parenthesis(
			BinaryOperator(BOP_sub, 
				Parenthesis(
					IdentName("dist_liter_")
				), 
				Parenthesis(
					IdentName("dist_fiter_")
				)
			)
		)
	);

	if (ordnum)               /* Need more info for doacross loops */
		stmp = Expression(      /* ort_entering_doacross(nw,doacnum,collnum,...); */
	           FunctionCall(
	             IdentName("ort_entering_doacross"),
	             Comma6(
	               numConstant(nw ? 1 : 0),
	               numConstant(doacrossnum),
	               numConstant(collapsenum),
	               numConstant(FOR_CLAUSE2SCHED(schedtype, static_chunk)),
	               schedchunk ? IdentName(chsize) : numConstant(-1),
	               IdentName(DOACCPARAMS)
	             )
	           )
	         );
	else
		stmp = Expression(      /* ort_entering_for(nw,ord); */
	           FunctionCall(
	             IdentName("ort_entering_for"),
	             Comma2(numConstant(nw ? 1 : 0), numConstant(ord ? 1 : 0))
	           )
	         );

		
	stmp = BlockList(
	         Expression(     /* niters_ = ... */
	           Assignment(IdentName("niters_"), ASS_eq, expr)
	         ),
	         stmp
	       );
	stmp = BlockList(decls, stmp);
	if (hasboth)   /* a var is both fip & lap; this needs a barrier here :-( */
		stmp = BlockList(stmp, BarrierCall());

	/* Prepare the main loop */
	if (schedtype == OC_affinity)
	{
		/* same loop, new body */
		if (lasts || ord)
		{
			if (lasts)
				s->body = BlockList(   /* iter++ */
				            Expression(PostOperator(IdentName("iter"), UOP_inc)),
				            s->body
				          );
			s->body = Compound(s->body);
		}
		s->body = If(
		            FunctionCall(
		              IdentName("ort_affine_iteration"), schedchunk
		            ),
		            s->body,
		            NULL
		          );
	}
	else
	{
		aststmt idx = NULL;

		if (collapsenum > 1)
		{
			idx = AssignStmt(IdentName("pp_"), OneExpr());
			for (i = collapsenum - 1; i >= 0; i--)
			{
				idx = BlockList(
				        idx,
				        AssignStmt(
				          Identifier(vars[i]),
				          BinaryOperator(
				            stepdir[i], //BOP_add,
				            ast_expr_copy(lbs[i]),
				            BinaryOperator(
				              BOP_mul,
				              ast_expr_copy(steps[i]),
				              Parenthesis(
				                BinaryOperator(
				                  BOP_mod,
				                  Parenthesis(
				                    BinaryOperator(
				                      BOP_div,
				                      Parenthesis(
											BinaryOperator(
												BOP_add,
												IdentName(NORMALIZEDITER),
												IdentName("dist_fiter_")
											)
									  ),
				                      IdentName("pp_")
				                    )
				                  ),
				                  IdentName(iters[i])
				                )
				              )
				            )
				          )
				        )
				      );
				if (i != 0)
					idx = BlockList(
					        idx,
					        Expression(Assignment(IdentName("pp_"), ASS_mul,
					                              IdentName(iters[i]))
					                  )
					      );
			}
		}
		/* Loop becomes:
		 *   for (iter = fiter; iter < liter; iter++) {
		 *     <var> = lb +/- iter*step
		 *     <body>
		 *   }
		 * optimized as:
		 *   for (iter = fiter, var = ...; iter < liter; iter++, var +/-= step) {
		 *     <body>
		 *   }
		 * If there is an ordered clause, we insert "ort_for_curriter(iter_)"
		 * just before the body, to let the runtime know our current iteration.
		 */

#define ORTCURRITER \
     Expression(FunctionCall(IdentName("ort_for_curriter"), \
     IdentName(NORMALIZEDITER)))

		s = For(Expression((collapsenum > 1) ?
		                   Assignment(IdentName(NORMALIZEDITER),
		                              ASS_eq,
		                              IdentName("fiter_"))
		                   :
		                   CommaList(
		                     Assignment(IdentName(NORMALIZEDITER),
		                                ASS_eq,
		                                IdentName("fiter_")
									),
		                     Assignment(
								Identifier(var),
								ASS_eq,
								BinaryOperator(
									stepdir[0], 
									ast_expr_copy(lbs[0]),
									BinaryOperator(
										BOP_add,
										Parenthesis(IdentName("dist_fiter_")),
										BinaryOperator(
											BOP_mul,
											IdentName("fiter_"),
											ast_expr_copy(steps[0])
										)
									)
									
								)
		                    )
		                   )
		                  ),
		        BinaryOperator(BOP_lt, IdentName(NORMALIZEDITER),
		                       IdentName("liter_")
		                      ),
		        ((collapsenum > 1) ?
		         PostOperator(IdentName(NORMALIZEDITER), UOP_inc) :
		         CommaList(
		           PostOperator(IdentName(NORMALIZEDITER), UOP_inc),
		           Assignment(Identifier(var), 
		                      stepdir[0]==BOP_add ? ASS_add : ASS_sub,
		                      ast_expr_copy(steps[0]))
		         )
		        ),
		        (collapsenum > 1) ? 
		          ( ord ? Compound(BlockList(BlockList(idx, ORTCURRITER), s->body)) :
		                  Compound(BlockList(idx, s->body)) ) : 
		          ( ord ? Compound(BlockList(ORTCURRITER, s->body)) : s->body )
		       );

#undef ORTCURRITER
	}

	/* Schedule-dependent code */
	switch (schedtype)
	{
		case OC_static:
			if (static_chunk)
				stmp = BlockList(
				         stmp,
				         For(
				           parse_blocklist_string("chid_ = omp_get_thread_num();"),
				           NULL,
				           parse_expression_string("chid_ += TN_"),
				           Compound(
				             BlockList(
				               parse_blocklist_string(
				                 "fiter_ = chid_*(%s);"
				                 "if (fiter_ >= niters_) break;"
				                 "liter_ = fiter_ + (%s);"
				                 "if (liter_ > niters_) liter_ = niters_;",
				                 chsize, chsize
				               ),
				               s
				             )
				           )
				         )
				       );
			else
				stmp = BlockList(
				         stmp,
				         If(
				           parse_expression_string(
				             "ort_get_static_default_chunk(niters_, &fiter_, &liter_)"),
				           Compound(s),
				           NULL
				         )
				       );
			break;

		case OC_guided:
		case OC_dynamic:
			stmp = BlockList(
			         stmp,
			         While(
			           parse_expression_string(
			             "ort_get_%s_chunk(niters_,%s,%s,&fiter_,&liter_,(int*)0)",
			             schedtype == OC_guided ? "guided" : "dynamic",
			             schedchunk ? chsize : "1",
			             (modifer == OCM_none || modifer == OCM_monotonic) ? "1":"0"),
			           Compound(s)
			         )
			       );
			break;

		case OC_runtime:
			stmp = Block3(
			         stmp,
			         /* ort_get_runtime_schedule_stuff(&get_chunk_, &chunksize_); */
			         FuncCallStmt(
			           IdentName("ort_get_runtime_schedule_stuff"),
			           CommaList(
			             UOAddress(IdentName("get_chunk_")),
			             UOAddress(IdentName("chunksize_"))
			           )
			         ),
			         While(
			           parse_expression_string(  /* Too big to do it by hand */
			             "(*get_chunk_)(niters_, chunksize_, %s, &fiter_, &liter_, "
			             "&staticextra_)", 
			             (modifer == OCM_none || modifer == OCM_monotonic) ? "0":"1"),
			           Compound(s)
			         )
			       );
			break;

		case OC_affinity:
			stmp = BlockList(stmp, s);
			break;
	}

	*t = BlockList(v, stmp);

	/* Add a label that is used when canceling */
	*t = BlockList(*t, Labeled(Symbol(clabel), Expression(NULL)));
	if (!ispfor || ord || ordnum)   /* Still need it if ordered clause exists */
		*t = BlockList(*t, Call0_stmt("ort_leaving_for"));
	if (lasts)
	{
		if (collapsenum > 1)
		{
			aststmt idx;     /* Need to set explicitly the correct index values */

			idx = Expression(
			        Assignment(Identifier(vars[0]), stepdir[0] == BOP_add ? ASS_add : ASS_sub, ast_expr_copy(steps[0]))
			      );
			for (i = 1; i < collapsenum; i++)
				idx = BlockList(
				        idx,
				        Expression(
				          Assignment(Identifier(vars[i]), stepdir[i] == BOP_add ? ASS_add : ASS_sub, ast_expr_copy(steps[i]))
				        )
				      );
			lasts = BlockList(idx, lasts);
		}

		*t = BlockList(
		       *t,
		       If(
		         BinaryOperator(BOP_land,
		           IdentName(NORMALIZEDITER),
		           BinaryOperator(BOP_eqeq,
		             IdentName(NORMALIZEDITER),
		             IdentName("niters_")
		           )
		         ),
		         lasts->type == STATEMENTLIST ?  Compound(lasts) : lasts,
		         NULL
		       )
		     );
	}
	if (reds)
		*t = BlockList(*t, reds);
	if (needbarrier)
		*t = BlockList(*t, BarrierCall());
	else
		if (!nw)   /* We ditched the barrier; but should at least flush */
			*t = BlockList(*t, Call0_stmt("ort_fence")); 
	if (redfree)
		*t = BlockList(*t, redfree);

	*t = Compound(*t);
	ast_stmt_parent(parent, *t);

	/* Must free the array
	 */
}


/* Possible clauses:
 * private, firstprivate, lastprivate, collapse, dist_schedule.
 */
void xform_distribute(aststmt *t, int combined, aststmt for_body)
{
	aststmt   s = (*t)->u.omp->body, parent = (*t)->parent, v, realbody,
	          decls, inits = NULL, lasts = NULL, stmp, embdcls = NULL;
	astexpr   lb, ub, step, lbs[MAXLOOPS], ubs[MAXLOOPS], steps[MAXLOOPS], 
	          expr, elems;
	symbol    var, realvar, vars[MAXLOOPS];
	int       incrop, condop, stepdir[MAXLOOPS];
	int       i = 0, collapsenum = 1, nestnum;
	bool      haslast, hasboth, hasred;
	astexpr   dist_schedchunk = NULL;    /* the chunksize expression */
	char      *chsize = NULL,       /* the chunksize value or variable */
	          iters[MAXLOOPS][128],
	          plainstep,
	          plainsteps[MAXLOOPS];
	ompclause dist_sch = xc_ompcon_get_clause((*t)->u.omp, OCDISTSCHEDULE),
	          col = xc_ompcon_get_clause((*t)->u.omp, OCCOLLAPSE);
	symtab    dvars;
	stentry   varentry;

	v = ompdir_commented(OmpStmtDir(*t)); /* Put directive in comments */

	/*
	 * Preparations
	 */

	if (dist_sch)
	{
		assert(dist_sch->subtype == OC_static);  /* sanity */
		dist_schedchunk = dist_sch->u.expr;
		/* Optimize: if dist_schedchunk is a constant, don't use a variable for it */
		if (dist_schedchunk && dist_schedchunk->type == CONSTVAL)
			chsize = strdup(dist_schedchunk->u.str);    /* memory leak */
	}

	if (col)
	{
		if ((collapsenum = col->subtype) >= MAXLOOPS)
			exit_error(1, "(%s, line %d) ompi error:\n\t"
				"cannot collapse more than %d FOR loops.\n",
				OmpStmtDir(*t)->file->name, OmpStmtDir(*t)->l,MAXLOOPS);
	}

	/* Collect all data clause vars - we need to check if any vars
	 * are both firstprivate and lastprivate
	 */
	dvars = xc_validate_store_dataclause_vars(OmpStmtDir(*t));

	/* Analyze the loop(s) */
	nestnum = collapsenum;
	i = 0;
	do
	{
		analyze_omp_for(s, &var, &lb, &ub, &step, &condop, &incrop);

		/* First check if the loop variable has been enlisted; if not,
		 * it is automatically considered private (v25) - so we make it
		 * appear as if there was a private(var) clause.
		 */
		if ((varentry = symtab_get(dvars, var, IDNAME)) == NULL)
		{
			if (s->u.iteration.init->type == EXPRESSION)
				symtab_put(dvars, var, IDNAME)->ival = OCPRIVATE;
			else
				embdcls = (embdcls) ?
					BlockList(
						embdcls,
						Declaration( /* without the initializer */
							ast_spec_copy(s->u.iteration.init->u.declaration.spec),
							ast_decl_copy(s->u.iteration.init->u.declaration.decl->decl)
						)
					) :
					Declaration(
						ast_spec_copy(s->u.iteration.init->u.declaration.spec),
						ast_decl_copy(s->u.iteration.init->u.declaration.decl->decl)
					);
		}
		else
		{
			if (s->u.iteration.init->type != EXPRESSION)  /* a declaration */
				exit_error(1, "(%s, line %d) openmp error:\n\t"
					"iteration variable '%s' is declared within the FOR statement\n\t"
					"and thus it cannot appear in the directive's data clauses.\n",
					OmpStmtDir(*t)->file->name, OmpStmtDir(*t)->l, var->name);
			/* Remove the FIRSTPRIVATE attribute if any (there is no use for it) */
			/* Actually, v25 (p.64,l.23) specifies that the iteration variable
			 * can only appear in a PRIVATE or LASTPRIVATE clause, so we should
			 * emit at least a warning.
			 */
			if (varentry->ival == OCFIRSTPRIVATE || varentry->ival == OCFIRSTLASTPRIVATE)
				warning("(%s, line %d) warning:\n\t"
					"iteration variable '%s' cannot appear in a FIRSTPRIVATE clause..\n\t"
					"  .. let's pretend it was in a PRIVATE clause.\n",
					OmpStmtDir(*t)->file->name, OmpStmtDir(*t)->l, var->name);
			if (varentry->ival == OCFIRSTPRIVATE)
				varentry->ival = OCPRIVATE;
			else
				if (varentry->ival == OCFIRSTLASTPRIVATE)
					varentry->ival = OCLASTPRIVATE;
		}
		
		if (step == NULL || step->type == CONSTVAL)   /* ++/-- or += constant */
		{
			plainstep = (step == NULL) ? 1 : 2;
			step = (step == NULL) ? OneExpr() : ast_expr_copy(step);
		}
		else /* step != NULL && general expression for step */
		{
			step = Parenthesis(ast_expr_copy(step));   /* An expression */
			plainstep = 0;
		}

		vars[i] = var;
		lbs[i]  = Parenthesis(ast_expr_copy(lb));
		ubs[i]  = Parenthesis(
		            (condop == BOP_leq || condop == BOP_geq) ?  /* correct ub */
		            BinaryOperator((condop == BOP_leq) ? BOP_add : BOP_sub,
		                           Parenthesis(ast_expr_copy(ub)),
		                           OneExpr()) :
		            ast_expr_copy(ub)
		          );
		steps[i] = step;
		stepdir[i] = incrop;
		sprintf(iters[i], "dist_iters_%s_", var->name);
		plainsteps[i] = plainstep;      /* TODO: actually use this info */

		if (i == collapsenum-1)
		{
			realbody = s;                 /* Remember where the real body is */
			realvar = var;
		}
		if (i < nestnum - 1)
		{
			s = s->body;
			if (s != NULL && s->type == COMPOUND && s->body != NULL &&
			    s->body->type == ITERATION && s->body->subtype == SFOR)
				s = s->body;  /* { For } -> For */
			if (s == NULL || s->type != ITERATION || s->subtype != SFOR)
				exit_error(1, "(%s, line %d) openmp error:\n\t"
					"%d perfectly nested FOR loops were expected.\n",
					OmpStmtDir(*t)->file->name, OmpStmtDir(*t)->l,
					nestnum, nestnum);
		}
	}
	while ((++i) < nestnum);
	s = realbody;
	var = realvar;

	/* declarations from the collected vars (not the clauses!) */
	decls = xc_stored_vars_declarations(&haslast, &hasboth, &hasred);
	/* initialization statements for firstprivate non-scalar vars */
	if (decls)
		inits = xc_ompdir_fiparray_initializers(OmpStmtDir(*t));
	/* assignments for lastprivate vars */
	if (haslast)
		lasts = xc_ompdir_lastprivate_assignments(OmpStmtDir(*t));

	stmp  = (embdcls) ? embdcls : verbit(" ");
	stmp = BlockList(
	         stmp,  /* Initialize because if a thread gets no iterations, the */
	         Declaration(  /* lastprivate check for iter==niters may succeed! */
	           ITERCNT_SPECS,  /*  <specs> dist_niters_=0,iter_=0,dist_fiter_,dist_liter_=0; */
	           DeclList(
	             DeclList(
	               DeclList(
	                 InitDecl(
	                   Declarator(NULL, IdentifierDecl(Symbol("dist_niters_"))),
	                   ZeroExpr()
	                 ),
	                 InitDecl(
	                   Declarator(NULL, IdentifierDecl(Symbol(NORMALIZEDITER_DIST))),
	                   ZeroExpr()
	                 )
	               ),
	               Declarator(NULL, IdentifierDecl(Symbol("dist_fiter_")))
	             ),
	             InitDecl(
	               Declarator(NULL, IdentifierDecl(Symbol("dist_liter_"))),
	               ZeroExpr()
	             )
	           )
	         )
	       );

	
	if (collapsenum > 1)  /* We need vars for # iterations */
	{
		if (collapsenum > 1)
			stmp = BlockList(
			         stmp,
			         Declaration(ITERCNT_SPECS,
			                     InitDecl(
			                       Declarator(NULL, IdentifierDecl(Symbol("pp_"))),
			                       OneExpr()
			                     ))
			       );
		for (i = 0; i < nestnum; i++)
			stmp = BlockList(
			         stmp,
			         Declaration(
			           ITERCNT_SPECS,
			           InitDecl(
			             Declarator(NULL, IdentifierDecl(Symbol(iters[i]))),
		               specs2iters(lbs[i],ubs[i],steps[i],stepdir[i],plainsteps[i])
			           )
			         )
			       );
	}
	if (chsize == NULL)
	{
		chsize = "chunksize_";
		if (dist_schedchunk)     /* expr for chunk size */
		{
			stmp = BlockList(
			         stmp,
			         Declaration(
			           ITERCNT_SPECS,
			           InitDecl(
			             Declarator(NULL, IdentifierDecl(Symbol(chsize))),
			             ast_expr_copy(dist_schedchunk)
			           )
			         )
			       );
		}
	}
	/* we may need 2 more variables (int) */
	if (dist_schedchunk)
		stmp = BlockList(
		         stmp,
		         Declaration( /* declare: int dist_chid_, dist_TN_=omp_get_num_teams(); */
		           Declspec(SPEC_int),
		           DeclList(
		             Declarator(NULL, IdentifierDecl(Symbol("dist_chid_"))),
		             InitDecl(
		               Declarator(NULL, IdentifierDecl(Symbol("dist_TN_"))),
		               Call0_expr("omp_get_num_teams")
		             )
		           )
		         )
		       );

	decls = (decls) ? BlockList(decls, stmp) : stmp;

	/*
	 * Do the job
	 */

	(*t)->u.omp->body = NULL;     /* Make it NULL so as to free it easily */
	ast_free(*t);                 /* Get rid of the OmpStmt */

	if (inits)
		decls = BlockList(decls, inits);  /* Append the initialization statements */

	/* Append our new code: dist_niters_ = ...;  */
	if (collapsenum == 1)
		elems = CastLong( 
		          specs2iters(lbs[0], ubs[0], steps[0], stepdir[0], plainsteps[0]) 
		        );
	else
		for (elems = IdentName(iters[0]), i = 1; i < collapsenum; i++)
			elems = BinaryOperator(BOP_mul, elems, IdentName(iters[i]));
	expr = elems;

	// stmp = Expression(      /* ort_entering_for(nw,ord); */
	//          FunctionCall(
	//            IdentName("ort_entering_for"),
	//            Comma2(OneExpr(), ZeroExpr())
	//          )
	//        );

	stmp =  Expression(     /* dist_niters_ = ... */
	           Assignment(IdentName("dist_niters_"), ASS_eq, expr)
	         );

	stmp = BlockList(decls, stmp);
	if (hasboth)   /* a var is both fip & lap; this needs a barrier here :-( */
		stmp = BlockList(stmp, BarrierCall());

	/* Prepare the main loop */
	{
		aststmt idx = NULL;

		if (collapsenum > 1)
		{
			idx = AssignStmt(IdentName("pp_"), OneExpr());
			for (i = collapsenum - 1; i >= 0; i--)
			{
				idx = BlockList(
				        idx,
				        AssignStmt(
				          Identifier(vars[i]),
				          BinaryOperator(
				            stepdir[i], //BOP_add,
				            ast_expr_copy(lbs[i]),
				            BinaryOperator(
				              BOP_mul,
				              ast_expr_copy(steps[i]),
				              Parenthesis(
				                BinaryOperator(
				                  BOP_mod,
				                  Parenthesis(
				                    BinaryOperator(
				                      BOP_div,
				                      IdentName(NORMALIZEDITER_DIST),
				                      IdentName("pp_")
				                    )
				                  ),
				                  IdentName(iters[i])
				                )
				              )
				            )
				          )
				        )
				      );
				if (i != 0)
					idx = BlockList(
					        idx,
					        Expression(Assignment(IdentName("pp_"), ASS_mul,
					                              IdentName(iters[i]))
					                  )
					      );
			}
		}
		/* Loop becomes:
		 *   for (iter = fiter; iter < liter; iter++) {
		 *     <var> = lb +/- iter*step
		 *     <body>
		 *   }
		 * optimized as:
		 *   for (iter = fiter, var = ...; iter < liter; iter++, var +/-= step) {
		 *     <body>
		 *   }
		 */

#define ORTCURRITER \
     Expression(FunctionCall(IdentName("ort_for_curriter"), \
     IdentName(NORMALIZEDITER_DIST)))

		if (!combined) {
			s = For(Expression((collapsenum > 1) ?
		                   Assignment(IdentName(NORMALIZEDITER_DIST),
		                              ASS_eq,
		                              IdentName("dist_fiter_"))
		                   :
		                   CommaList(
		                     Assignment(IdentName(NORMALIZEDITER_DIST),
		                                ASS_eq,
		                                IdentName("dist_fiter_")),
		                     Assignment(Identifier(var),
		                                ASS_eq,
		                                BinaryOperator(
		                                  stepdir[0], 
		                                  ast_expr_copy(lbs[0]),
		                                  BinaryOperator(BOP_mul,
		                                    IdentName("dist_fiter_"),
		                                    ast_expr_copy(steps[0]))
		                                  )
		                               )
		                   )
		                  ),
		        BinaryOperator(BOP_lt, IdentName(NORMALIZEDITER_DIST),
		                       IdentName("dist_liter_")
		                      ),
		        ((collapsenum > 1) ?
		         PostOperator(IdentName(NORMALIZEDITER_DIST), UOP_inc) :
		         CommaList(
		           PostOperator(IdentName(NORMALIZEDITER_DIST), UOP_inc),
		           Assignment(Identifier(var), 
		                      stepdir[0]==BOP_add ? ASS_add : ASS_sub,
		                      ast_expr_copy(steps[0]))
		         )
		        ),
		        (collapsenum > 1) ? 
		          Compound(BlockList(idx, s->body)) : s->body
		       );
		}
		else {
			s = for_body;
		}

#undef ORTCURRITER
	}

	/* Schedule-dependent code */
	if (dist_schedchunk)
		stmp = BlockList(
		         stmp,
		         For(
		           parse_blocklist_string("dist_chid_ = omp_get_team_num();"),
		           NULL,
		           parse_expression_string("dist_chid_ += dist_TN_"),
		           Compound(
		             BlockList(
		               parse_blocklist_string(
		                 "dist_fiter_ = dist_chid_*(%s);"
		                 "if (dist_fiter_ >= dist_niters_) break;"
		                 "dist_liter_ = dist_fiter_ + (%s);"
		                 "if (dist_liter_ > dist_niters_) dist_liter_ = dist_niters_;",
		                 chsize, chsize
		               ),
		               s
		             )
		           )
		         )
		       );
	else
		stmp = BlockList(
		         stmp,
		         If(
		           parse_expression_string(
		             "ort_get_distribute_chunk(dist_niters_, &dist_fiter_, &dist_liter_)"),
		           Compound(s),
		           NULL
		         )
		       );

	*t = BlockList(v, stmp);

	// *t = BlockList(*t, Call0_stmt("ort_leaving_for"));
	if (lasts)
	{
		if (collapsenum > 1)
		{
			aststmt idx;     /* Need to set explicitly the correct index values */

			idx = Expression(
			        Assignment(Identifier(vars[0]), stepdir[0] == BOP_add ? ASS_add : ASS_sub, ast_expr_copy(steps[0]))
			      );
			for (i = 1; i < collapsenum; i++)
				idx = BlockList(
				        idx,
				        Expression(
				          Assignment(Identifier(vars[i]), stepdir[i] == BOP_add ? ASS_add : ASS_sub, ast_expr_copy(steps[i]))
				        )
				      );
			lasts = BlockList(idx, lasts);
		}

		*t = BlockList(
		       *t,
		       If(
		         BinaryOperator(BOP_land,
		           IdentName(NORMALIZEDITER_DIST),
		           BinaryOperator(BOP_eqeq,
		             IdentName(NORMALIZEDITER_DIST),
		             IdentName("dist_niters_")
		           )
		         ),
		         lasts->type == STATEMENTLIST ?  Compound(lasts) : lasts,
		         NULL
		       )
		     );
	}

	*t = Compound(*t);
	ast_stmt_parent(parent, *t);

	/* Must free the array
	 */
}


void xform_distparfor(aststmt *t, int combined)
{
	if (!combined)
	{
		/* The non-combined version should use ort_execute_parallel etc. */
		fprintf(stderr, 
		        "#pragma omp distribute parallel for: not supported yet...\n");
		return;
	}
	
	/* 
	 * Prepare a copy of the body and transform it as if it belonged to a #for
	 */
	aststmt tcopy = ast_stmt_copy((*t));
	OmpStmtDir(tcopy)->clauses = ast_ompclause_copy(OmpStmtDir(*t)->clauses);
	tcopy->u.omp->body = (*t)->u.omp->body;
	tcopy->u.omp->body->file =OmpStmtDir(*t)->file;
	tcopy->u.omp->body->l = OmpStmtDir(*t)->l;
	tcopy->u.omp->body->c = OmpStmtDir(*t)->c;
	
	ast_stmt_parent((*t)->parent, tcopy);
	   _xform_for_fordist(&tcopy);
	xform_distribute(t, combined, tcopy->body);
}


void xform_distsimd(aststmt *t)
{
	fprintf(stderr, "#pragma omp distribute simd: not supported yet...\n");
}

void xform_distparforsimd(aststmt *t)
{
	fprintf(stderr, 
	        "#pragma omp distribute parallel for simd: not supported yet...\n");
}
