#include "mpi.h"

#define BF( x ) ( ( x ) == 0 ? ( int * ) 1 : ( x ) )

void my_allreduce( void * send_buf, void * recv_buf, int count, MPI_Datatype datatype,
		        MPI_Op op, MPI_Comm comm)
{
  int
    me, i, nprocs, typesize;
  int
    *offsets, *temp_buf;

  MPI_Comm_rank( comm, &me );
  MPI_Comm_size( comm, &nprocs );

  MPI_Type_size( datatype, &typesize );

  temp_buf = (int *) malloc( count/nprocs * typesize );

  offsets = ( int * ) malloc( ( nprocs+1 ) * sizeof( int ) );

  offsets[ 0 ] = 0;
  for ( i=0; i<nprocs; i++ )
    offsets[ i+1 ] = offsets[ i ] + count/nprocs;

  my_reduce_scatter_x( BF( send_buf ), BF( temp_buf ), offsets, datatype, op, comm );

  my_allgather( BF( temp_buf ), count/nprocs, datatype, BF( recv_buf ), count/nprocs, datatype, comm);

  free( offsets );

  if ( temp_buf != 0 )
    free( temp_buf );

  return;
}
