#include "Bdef.h"

/*
 *  The bidirectional exchange topology (BE) is specialized for dealing with
 *  case where all nodes participating in the operation need to
 *  receive the answer.  It works best when # of nodes is some even
 *  power of two.  This topology is based on an algorithm presented by
 *  Robert van de Geijn, et al.
 */
void BE_comb(ctxt, scope, MatInf, buff, Xmvpk, Xmvupk, Xmvupk_op)
BLACSCONTEXT  *ctxt;
char  scope;
MATINFO  *MatInf;
char  *buff;
MVPK  Xmvpk;
MVPK  Xmvupk;
MVUPKOP  Xmvupk_op;
/*
 *  -- V1.0 BLACS routine --
 *  University of Tennessee, February 28, 1995
 *  Written by Clint Whaley.
 *
 *  Purpose
 *  =======
 *  Perform a element-by-element combine on vectors.
 *  The answer will be left on all participating processes.  Since this method
 *  uses a hypercube communication pattern, the number of nodes participating
 *  in the operation must be a power of 2 for it to perform efficiently.
 *
 *  Arguments
 *  =========
 *  CTXT    (input) pointer to BLACSCONTEXT
 *          The BLACS context where operation is taking place.
 *
 *  SCOPE   (input) char
 *          Limit the scope of the operation.
 *          = 'r' :   Operation is performed by a process row.
 *          = 'c' :   Operation is performed by a process column.
 *          = 'a' :   Operation is performed by all processes in grid.
 *
 *  MATINF  (input) ptr to MATINFO (structure)
 *          This structure contains information about the matrices to be
 *          operated on.  The number of matrices, and what each matrix is
 *          depends on the operation being performed.
 *
 *  BUFF    (workspace) Pointer to char
 *          This space used to hold a column of data while it is being
 *          operated on.
 *
 *  Xmvpk   (input) pointer to packing function
 *          Points to the packing function appropriate for a given operation.
 *
 *  Xmvupk  (input) pointer to unpacking function
 *          Points to the unpacking function appropriate for a given operation.
 *
 * Xmvupk_op(input) pointer to operation/unpacking function
 *          Points to a function which unpacks the columns of the received
 *          matrix, and stores the result of the operation in the output
 *          matrix (or matrices) pointed to by MATINF.
 *
 * ------------------------------------------------------------------------
 */
{
   void Ssend2d00();
   void Srecv2d00();
   char *getbuff();

   int msgid, np2msgid, nnodes, rdest, cdest, mydist, np2, bit;

   switch (scope)
   {
   case 'r':
      nnodes = ctxt->npcol;
      mydist = ctxt->mycol;
      break;
   case 'c':
      nnodes = ctxt->nprow;
      mydist = ctxt->myrow;
      break;
   case 'a':
      nnodes = ctxt->Ng;
      mydist = ctxt->vIam;
      break;
   default :
      return;
   }
   if (nnodes < 2) return;
   rdest = ctxt->myrow;
   cdest = ctxt->mycol;


   for (np2=4; np2 < nnodes; np2 <<= 1);
   if (np2 > nnodes) np2 >>= 1;

   if (np2 != nnodes)
   {
      switch(scope)
      {
      case 'r':
         np2msgid = Mrid(ctxt);
         cdest = mydist ^ np2;
         break;
      case 'c':
         np2msgid = Mcid(ctxt);
         rdest = mydist ^ np2;
         break;
      case 'a':
         np2msgid = Maid(ctxt);
         Mvpcoord(ctxt, ctxt->vIam^np2, rdest, cdest);
         break;
      }
      if (mydist >= np2)		/* I am node beyond power of 2 */
      {
         Xmvpk(MatInf);
         Ssend2d00(ctxt, rdest, cdest, np2msgid);
/*
 *       Update my message ID's to match those who participate in BE
 */
         for (bit=1; (bit ^ np2); bit <<= 1)
	 {
	    switch(scope)
	    {
	    case 'r':
	       Mrid(ctxt);
	       break;
            case 'c':
	       Mcid(ctxt);
	       break;
            case 'a':
	       Maid(ctxt);
	       break;
	    }
	 }
         Srecv2d00(ctxt, np2msgid);
         Xmvupk(MatInf);
      }
      else if (mydist < (nnodes^np2))  /* need to fan in contents of */
      {                                /* non-power of 2 nodes */
         Srecv2d00(ctxt, np2msgid);
         Xmvupk_op(MatInf, buff);
      }
   }

   if (mydist < np2)
   {
      for(bit=1; (bit^np2); bit <<= 1)
      {
         switch(scope)
         {
         case 'r':
            msgid = Mrid(ctxt);
            cdest = mydist ^ bit;
            break;
         case 'c':
            msgid = Mcid(ctxt);
            rdest = mydist ^ bit;
            break;
         case 'a':
            msgid = Maid(ctxt);
            Mvpcoord(ctxt, ctxt->vIam ^ bit, rdest, cdest);
            break;
         }
         Xmvpk(MatInf);
         Ssend2d00(ctxt, rdest, cdest, msgid);
         Srecv2d00(ctxt, msgid);
         Xmvupk_op(MatInf, buff);
      }  /* end for */
/*
 *  For nodes that are not part of the hypercube proper, we must
 *  send data back.
 */
      if (mydist < (nnodes^np2))
      {
         switch(scope)
         {
         case 'r':
            cdest = mydist ^ np2;
            break;
         case 'c':
            rdest = mydist ^ np2;
            break;
         case 'a':
            Mvpcoord(ctxt, mydist ^ np2, rdest, cdest);
            break;
         }
         Xmvpk(MatInf);
         Ssend2d00(ctxt, rdest, cdest, np2msgid);
      }
   }  /* end if (nodes inside power of 2) */
}
