#include "mpi.h"

void my_scatter_x( char **, int, int, int, int, MPI_Comm );
void my_allgather_x( char **, int, int, MPI_Comm );

double sqrt( double );

void my_bcast2( void *send_buf, int send_size, MPI_Datatype send_datatype,
               int root, MPI_Comm comm )
{
  int
    me, nprocs, i, typesize, recv_size, nprocs1;
  char 
    **send_location;

  MPI_Comm_rank( comm, &me );
  MPI_Comm_size( comm, &nprocs );

  MPI_Type_size( send_datatype, &typesize );

  send_location = ( char ** ) malloc( ( nprocs + 1 ) * sizeof( char * ) );

  send_location[ 0 ] = ( char * ) send_buf;

  recv_size = send_size / nprocs;

  for ( i = 0; i<nprocs; i++ ){
    send_location[ i+1 ] = send_location[ i ] + recv_size * typesize;
    if ( i < send_size % nprocs ) send_location[ i+1] += typesize;
  }

  my_scatter_x( send_location, me, root, 0, nprocs-1, comm );

  nprocs1 = (int) sqrt( (double) nprocs );

  while ( ( nprocs/nprocs1 ) * nprocs1 != nprocs ) nprocs1--;

  my_allgather2_x( send_location, (me/nprocs1)*nprocs1, nprocs1, 1, me, comm );
  my_allgather2_x( send_location, me%nprocs1, nprocs/nprocs1, nprocs1, 
		   me, comm ); 

  free ( send_location );

  return;
}

