Homework Assignment 2 CS 378h Unique Number: 53455 Spring, 2026 Given: February 10, 2026 Due: February 19, 2026 This homework assignment concerns transposing the elements of a square matrix. Below is some C-language code that contains a straightforward implementation of code to perform a square-matrix transpose. This homework requires adding solutions for case 1 and case 2 in the transpose routine. The code to modify is given below. Think carefully about the code you write because you will use this code in an upcoming laboratory. // mem-copy.c Warren A. Hunt, Jr. #include #include #include #define VALUE_4096 (4096) #define LOG_MATRIX_SIZE ( 15 ) #define MAX_MATRIX_INDEX ( 1 << LOG_MATRIX_SIZE ) #define MEMSIZE_QWORDS ( MAX_MATRIX_INDEX * MAX_MATRIX_INDEX ) #define MEMSIZE_BYTES ( 8ULL * MEMSIZE_QWORDS ) #define REPEAT_COUNT ( 1 ) typedef long long int qword; // Macro for index calculation #define ij( i, j, n ) ( (i * n) + j ) long long unsigned int_pow( qword i, qword e ) { if ( i < 0 ) return 1; qword ans = 1; while ( e ) { ans = ans * i; e--; }; return( ans ); } void init_square_matrix( qword qw_mem[], qword n ) { for( qword i = 0; i < n; i++ ) for( qword j = 0; j < n; j++ ) qw_mem[ ij( i, j, n ) ] = ij( i, j, n ); } void print_square_matrix( qword qw_mem[], qword n ) { for( qword i = 0; i < n; i++ ) for( qword j = 0; j < n; j++ ) printf( "qw_mem %3llu is: %3lld.\n", ij( i, j, n ), qw_mem[ ij( i, j, n ) ] ); } void transpose( qword qw_mem [], qword n, int part ) { switch( part ) { case 0: // typical transpose code for( qword r = 0; r < REPEAT_COUNT; r++ ) for( qword i = 0; i < n; i++ ) for( qword j = i+1; j < n; j++ ) { qword ij = ij( i, j, n ); qword ji = ij( j, i, n ); qword tmp = qw_mem[ ij ]; qw_mem[ ij ] = qw_mem[ ji ]; qw_mem[ ji ] = tmp; } break; case 1: // loop unrolled transpose code // Solution for Part 1 goes here. printf( "Part 1 is not yet implemented.\n" ); exit( EXIT_FAILURE ); break; case 2: // block-by-block transpose code // Solution for Part 2 goes here. printf( "Part 2 is not yet implemented.\n" ); exit( EXIT_FAILURE ); break; default: printf( "Unexpected part.\n" ); exit( EXIT_FAILURE ); break; } } int check_transpose( qword qw_mem [], qword n ) { int matrix_ok = 1; for( qword i = 0; i < n; i++ ) for( qword j = i+1; j < n; j++ ) matrix_ok &= qw_mem[ ij( i, j, n ) ] == ij( j, i, n ); return( matrix_ok ); } int main ( int argc, char* argv[], char* env[] ) { unsigned long long from, to, cnt, matrix_size; int part = 0; int matrix_ok = 1; char *endptr; if (argc != 3) { fprintf(stderr, "Usage: %s \n", argv[0] ); return EXIT_FAILURE; } qword log_matrix_size = atoi( argv[1] ); fprintf(stderr, "log_matrix_size = %lld.\n", log_matrix_size ); int at_least_1 = 1 <= log_matrix_size; int le_than_15 = log_matrix_size <= LOG_MATRIX_SIZE; int in_range = at_least_1 && le_than_15; int not_in_range = !( in_range ); if ( not_in_range ) { fprintf(stderr, "Log of matrix dimension %lld is out of range of 1..15.\n", log_matrix_size ); exit( EXIT_FAILURE ); } int case_num = atoi( argv[2] ); fprintf(stderr, "Part = %d.\n", case_num ); if ( !( case_num == 0 || case_num == 1 || case_num == 2 ) ) { fprintf(stderr, "Illegal Part Number: %d!\n", case_num ); exit( EXIT_FAILURE ); } // Allocate memory void *mem = aligned_alloc( VALUE_4096, (qword) MEMSIZE_BYTES ); if (mem == NULL) { perror("Failed to allocate aligned memory."); exit( EXIT_FAILURE) ; } printf( "Allocated %llu 4K-aligned bytes at address %p.\n", (qword) MEMSIZE_BYTES, mem); for( qword r = 0; r <= log_matrix_size; r++ ) { matrix_ok = 1; matrix_size = int_pow( 2, r ); init_square_matrix( mem, matrix_size ); // Start individual TRANSPOSE timing here! transpose( mem, matrix_size, case_num ); // End TRANSPOSE time here! // Check that transpose returned an expected answer. matrix_ok = check_transpose( mem, matrix_size ); printf( "Matrix transpose of size %5lld is (Bad 0, OK 1): %d.\n", matrix_size, matrix_ok ); } free( mem ); // Should we call ``aligned_free''? Not available on MacOS. printf( "Exit code: %d.\n", EXIT_SUCCESS ); exit( EXIT_SUCCESS ); }