#include "mpi.h"

void my_reduce( void * send_buf, void * recv_buf, int count, MPI_Datatype datatype,
	        MPI_Op op, int root, MPI_Comm comm)
{
  int
    me, i, nprocs, typesize;
  int
    *offsets;
  int
    *temp_buf;

  MPI_Comm_rank( comm, &me );
  MPI_Comm_size( comm, &nprocs );

  MPI_Type_size( datatype, &typesize );

  if ( count > 0 )
    temp_buf = (int *) malloc ( count * typesize );
  else
    temp_buf = ( int * ) 1;

  offsets = ( int * ) malloc( ( nprocs+1 ) * sizeof( int ) );

  offsets[ 0 ] = 0;
  for ( i=0; i<nprocs; i++ )
    offsets[ i+1 ] = offsets[ i ] + count/nprocs;

  my_reduce_scatter_x( send_buf, temp_buf, offsets, datatype, op, comm );

  my_gather( temp_buf, count/nprocs, datatype, recv_buf, count/nprocs, datatype, root, comm);

  free( offsets );
  if ( count > 0 )
    free( temp_buf );

  return;
}
