#include "mpi.h"

void new_mpi_allreduce( void * send_buf, void * recv_buf, int count, MPI_Datatype datatype,
		        MPI_Op op, MPI_Comm comm)
{
  int
    me, i, nprocs, typesize;
  int
    *recv_count, *temp_buf;

  MPI_Comm_rank( comm, &me );
  MPI_Comm_size( comm, &nprocs );

  MPI_Type_size( datatype, &typesize );

  if ( count/nprocs != 0 )
    temp_buf = (int *) malloc( count/nprocs * typesize );
  else
    temp_buf = ( int * ) 1;

  if ( temp_buf == 0 )
    printf("malloc failed\n");

  recv_count = (int *) malloc( ( nprocs + 1 ) * sizeof( int ));

  for ( i = 0; i < nprocs; i++ )
    {
      recv_count[i] = count/nprocs;
    }

  
  MPI_Reduce_scatter( send_buf, temp_buf, recv_count, datatype, op, comm);

  MPI_Allgather( temp_buf, count/nprocs, datatype, recv_buf, count/nprocs, datatype, comm);

  free( recv_count );
  if ( count/nprocs != 0 )
    free( temp_buf );

  return;
}
