#include "mpi.h"

double sqrt( double );

void my_allgather2_x( char **, int , int, int, int, MPI_Comm );


void my_allgather3( void *send_buf, int send_size, MPI_Datatype send_datatype,
	        void *recv_buf, int recv_size, MPI_Datatype recv_datatype,
                MPI_Comm comm )
{
  int
    me, nprocs, left, right, index, index_next, i, typesize, nprocs_cur,
    stride, nprocs_included, nprocs_included_next, nprocs_left;

  char 
    **recv_location;

  MPI_Status
    status;

  MPI_Request
    request;
  
  if ( send_datatype != recv_datatype ){
    printf(" send_datatype != recv_datatype not yet implemented\n" );
    exit ( 0 );
  }

  MPI_Comm_rank( comm, &me );
  MPI_Comm_size( comm, &nprocs );

  MPI_Type_size( send_datatype, &typesize );

  recv_location = ( char ** ) malloc( ( nprocs + 1 ) * sizeof( char * ) );

  recv_location[ 0 ] = ( char * ) recv_buf;

  for ( i = 0; i<nprocs; i++ )
    recv_location[ i+1 ] = recv_location[ i ] + recv_size * typesize;

  memcpy( recv_location[ me ], send_buf, recv_size * typesize );

  stride = 1;
  nprocs_cur = 2;
  nprocs_left = nprocs;
  nprocs_included = 1;
  while ( nprocs_left > 1 ){
    /* determine next factor */
    while ( ( nprocs_left/nprocs_cur ) * nprocs_cur != nprocs_left )
      nprocs_cur++;

    nprocs_included_next = nprocs_included * nprocs_cur;

    my_allgather2_x( recv_location,
		    (me/nprocs_included_next)*nprocs_included_next +
		    me % nprocs_included, nprocs_cur, stride, me, comm );

    nprocs_included = nprocs_included_next;
    stride *= nprocs_cur;
    nprocs_left /= nprocs_cur;
  }
  
  free ( recv_location );

  return;
}



