ms-20230918-m.c
Note added on 2024.06.20
Self complementarity related codes should be removed from functions MakeCols() and MakeColsLast() in order to optimize the performance for N=6. Complements of any rows never appear as columns in even order magic squares.
- ms-20230918-m.c
/* See https://magicsquare6.net/ . Updated on 2023/09/18. Authored by Hidetoshi Mino hidetoshimino@gmail.com . */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <pthread.h> #include <assert.h> #include <stdatomic.h> #ifndef noMD5 #include <openssl/md5.h> #endif #define MAX( x, y ) ( (x) > (y) ? (x) : (y) ) #ifndef N #define N 6 #endif #ifndef NTH #define NTH 1 #endif #ifndef NSMPL #define NSMPL 1 #endif #define NN (N*N) #define AllBits ( ( 1ULL << NN ) - 1 ) typedef long long unsigned u64; inline int topbit( u64 v ) { v = v & ( ~( v >> 1 ) ); float f32 = (float) v; int pos = *( (int *) &f32 ); pos =( pos >> 23 ) - 0x7f; return pos; /* int pos = 0; int v32 = v >> 32; if( v32 ) { pos = 32; } else { v32 = (int) v; } if( v32 & 0xffff0000UL ) { pos +=16; v32 >>= 16; } if( v32 & 0x0000ff00UL ) { pos += 8; v32 >>= 8; } if( v32 & 0x000000f0UL ) { pos += 4; v32 >>= 4; } if( v32 & 0x0000000cUL ) { pos += 2; v32 >>= 2; } if( v32 & 0000000002UL ) pos++; return pos; */ } u64 reverse64( u64 x ) { u64 y; y = ( ( x & 0x5555555555555555ULL ) << 1 ); x = ( y | ( ( x >> 1 ) & 0x5555555555555555ULL ) ); y = ( ( x & 0x3333333333333333ULL ) << 2 ); x = ( y | ( ( x >> 2 ) & 0x3333333333333333ULL ) ); y = ( ( x & 0x0F0F0F0F0F0F0F0FULL ) << 4 ); x = ( y | ( ( x >> 4 ) & 0x0F0F0F0F0F0F0F0FULL ) ); y = ( ( x & 0x00FF00FF00FF00FFULL ) << 8 ); x = ( y | ( ( x >> 8 ) & 0x00FF00FF00FF00FFULL ) ); y = ( ( x & 0x0000FFFF0000FFFFULL ) << 16 ); x = ( y | ( ( x >> 16 ) & 0x0000FFFF0000FFFFULL ) ); y = ( ( x & 0x00000000FFFFFFFFULL ) << 32 ); x = ( y | ( ( x >> 32 ) & 0x00000000FFFFFFFFULL ) ); return x; } int check_topbit( ) { for( int i = 0; i < NN; i ++ ) { // printf( "%d %09llx %d\n", i, 1ULL << i, topbit( 1ULL << i ) ); if( topbit( 1ULL << i ) != i ) return 1; } for( int i = 1; i < NN; i ++ ) { // printf( "%d %09llx %d\n" // , i, ( 1ULL << i ) - 1, topbit( ( 1ULL << i ) - 1 ) ); if( topbit( ( 1ULL << i ) - 1 ) != i - 1 ) return 1; } fprintf( stderr, "topbit() works correctly.\n" ); return 0; } int factorial( int i ) { if( i < 1 ) return 1; return factorial( i - 1 ) * i; } #define OW 3 // octal bit witdh #define OMASK ( ( 1 << OW ) - 1 ) // octal bit mask int pcode( int index, int n ) { int posv = 076543210; // octal number int onev = 011111111; // octal number int res = 0; n--; for( ; n > 0; n-- ) { int pos = ( index >> ( n * OW ) ) & OMASK ; int shift = pos * OW; pos = ( ( posv >> shift ) & OMASK ); shift += OW; posv -= ( ( onev >> shift ) << shift ); res += pos; res *= n; } return res; } int pinv( int pcode, int n ) { int res = 0; for( int b = 1; b < n; b ++ ) { int p = pcode % ( b + 1 ); int shift = p * OW; int upper = ( ( res >> shift ) << ( shift + OW ) ); res = upper | ( b << shift ) | ( res & ( ( 1 << shift ) - 1 ) ); pcode /= ( b + 1 ); } return res; } int inv_indx ( int index ) { int res = 0; int pos = 0; for( ; index != 0; index >>= OW ) { res |= ( pos << ( ( index & OMASK ) * OW ) ); pos ++; } return res; } void GenPerm( int order, const int mask_w, int* res, int res_size ) { if( order < 2 ) { *res = 0; return; } int sub_size = res_size / order; int sub_res[ sub_size ]; GenPerm( order - 1, mask_w, sub_res, sub_size ); unsigned up_mask = ( 1u << ( order - 1 ) * mask_w ) - 1; unsigned low_mask = up_mask; unsigned insert = ( order - 1 ) << ( order - 1 ) * mask_w; for( int i = 0; i < order; i ++ ) { for( int j = 0; j < sub_size; j ++ ) { int upper = sub_res[ j ] & ( up_mask ^ low_mask ); int lower = sub_res[ j ] & low_mask; *(res++) = ( upper << mask_w ) | insert | lower ; } low_mask >>= mask_w; insert >>= mask_w; } } /* = globals ======================= */ #define MPERM 720 #define MPERMW ( ( MPERM + 31 ) / 32 ) unsigned diag_chk_a[ MPERM ][ MPERMW ]; #define MVECS 32768 int nvecs = 0; u64 vecs[ MVECS ]; #define MROWSET 10000000 // probable max 8390876 u64 row_sets[ MROWSET ][ N - 2 ]; int n_row_sets; u64 count_g; u64 sm_count_g; /*= Thread shared ========================= */ // read/write int _Atomic i_row_set = ATOMIC_VAR_INIT( 0 ); // read only int max_row, second_row; u64 rev_max; // Reverse( vecs[ max_row ] ) u64* rows; int nrows; u64* cols; int ncols; u64* diags; int ndiags; /* = Thread local ========================== */ __thread u64 col_vecs[ N ]; __thread u64 row_vecs[ N ]; __thread int which_row[ 37 ]; // 37 is the common magic number for order <= 6. __thread u64 sub_count; __thread u64 sub_sm_count; // __thread u64* vr; __thread u64* vc; __thread u64* vx; /* ========================================= */ void Count( ); void MakeRows( int level, u64 rem, u64* vrb, u64* vre, u64* vcb, u64* vce, u64* vxb, u64* vxe, int single, u64* countp ); void RowSet( ); void MakeCols( int level, u64 rem, u64* vcb, u64* vce, u64* vxb, u64* vxe, int single ); void MakeColsLast( int level, u64 rem, u64* cols, u64* vxb, u64* vxe, int single ); unsigned MakeDiag( unsigned* diags, int ndiags ); int DiagPos( u64* vxb, u64* vxe, u64 col_vec, unsigned* cxe ); void set_vec( u64 vecs[], int index[], u64 v, int level ); int Cross( int v1, int v2 ) { u64 v = vecs[v1] & vecs[v2]; return v && ( ! ( v & ( v - 1 ) ) ); } u64 Reverse( u64 v ) { return reverse64( v ) >> (64 - NN); } void MakeDiagCheck( int n ) { int perm_size = factorial( n ); int perm[ perm_size ]; static const int mask_w = 3; GenPerm( n, mask_w, perm, perm_size ); unsigned lower_mask = ( 1 << mask_w ) - 1; unsigned pos_mask = ( 1 << ( mask_w * (n-1) ) ) - 1; for( int dx = 0; dx < perm_size; dx ++ ) { unsigned* va = diag_chk_a[ pcode( ( perm[ dx ] & pos_mask ) << 3, N ) ]; for( int j = 0; j < MPERMW; j ++ ) { va[ j ] = 0U; } int c[ n ]; for( int k = 0; k < n; k ++ ) { c[ ( perm[ dx ] >> ( (n-1-k)*mask_w ) ) & lower_mask ] = k; } for( int cx = 0; cx < perm_size; cx ++ ) { int r[ n ]; int ncross = 0; for( int k = 0; k < n; k ++ ) { r[ k ] = ( perm[ cx ] >> ( (n-1-k)*mask_w ) ) & lower_mask; if( c [ r[ k ] ] == k ) ncross ++; } if( ncross == N % 2 && r[ c[ r[ c[ 0 ] ] ] ] == 0 && r[ c[ r[ c[ 1 ] ] ] ] == 1 && r[ c[ r[ c[ 2 ] ] ] ] == 2 ) { int pos_code = pcode( ( perm[ cx ] & pos_mask ) << 3, N ); va[ pos_code >> 5 ] |= 1U << ( pos_code & ( ( 1 << 5 ) - 1 ) ); } } } } void GenVecs( int max_ent, int rem_sum, int level, u64 vp ) { static int elem[ N ]; if ( level >= 0 ) { for ( int ent = max_ent; ent > 0 ; --ent ) { u64 v = 1ULL << ( ent - 1 ); elem[ level ] = v % 37; // A trick to trasform 2^n into [1:36] if ( rem_sum >= ent ) GenVecs( ent - 1, rem_sum - ent, level - 1, vp | v ); } } else { if ( rem_sum == 0 ) { vecs[ nvecs ] = vp; /* u64 ve = 0ULL; for( int i = 0; i < N; i ++ ) { ve <<= 6; // 2^6 >= 37 ve |= elem[ i ]; } vec_elem[ nvecs ] = ve; */ nvecs ++; } } if( nvecs > MVECS ) { fprintf( stderr, "nvecs > MVECS\n" ); exit(1); } } int MakeRowSet( int m, int l, u64 rem, u64* vrb, u64* vre, u64 res[][N-2] ) { static u64 row_vecs[ N ]; static int j = 0; if( l == 0 ) j = 0; u64 must_bit = ( 1ULL << topbit( rem ) ); for( ; vrb < vre; vrb += NSMPL ) { u64 row_vec = *vrb; if( ! ( row_vec & must_bit ) ) break; row_vecs[ l ] = row_vec; if( m > l + 1 ) { u64* vrp = vrb + 1; u64* vrq = vre; while( vrp < vre ) { *vrq = *vrp; vrq += ( row_vec & *vrp ) ? 0 : 1; vrp ++; } MakeRowSet( m, l + 1, rem ^ row_vec, vre, vrq, res ); } else { for( int i = 0; i < m; i ++ ) { res[ j ][ i ] = row_vecs[ i ]; } j ++; assert( j < MROWSET ); } } return j; } void Count( ) { rows = malloc( sizeof( u64 ) * nvecs * ( N - 2 ) ); cols = malloc( sizeof( u64 ) * nvecs ); diags = malloc( sizeof( u64 ) * nvecs ); rev_max = Reverse( vecs[ max_row ] ); u64* rowp = rows; for( int i = second_row + 1; i < nvecs; ++ i ) if( ( ! ( vecs[ max_row ] & vecs[ i ] ) ) && ( ! ( vecs[ second_row ] & vecs[ i ] ) ) && vecs[ max_row ] >= Reverse( vecs[ i ] ) ) { *(rowp++) = vecs[ i ]; } nrows = rowp - rows; // printf(" %d\n", nrows ); u64* colp = cols; for( int i = max_row + 1; i < nvecs; ++ i ) if( Cross( max_row, i ) && Cross( second_row, i ) && vecs[ max_row ] >= Reverse( vecs[ i ] ) ) { *(colp++) = vecs[ i ]; } ncols = colp - cols; u64* diagp = diags; for( int i = 0; i < nvecs; ++ i ) if( Cross( max_row, i ) && Cross( second_row, i ) ) *(diagp++) = vecs[ i ]; ndiags = diagp - diags; u64 rem = AllBits ^ vecs[ max_row ] ^ vecs[ second_row ]; n_row_sets = MakeRowSet( N - 2, 0, rem, rows, rows + nrows, row_sets ); fprintf( stderr, "n_row_sets %d\n", n_row_sets ); i_row_set = 0; u64 count[ NTH ][2]; pthread_t thread[ NTH ]; for( int i = 0; i < NTH; i ++ ) { count[ i ][0] = 0ULL; count[ i ][1] = 0ULL; if (pthread_create( thread + i , NULL, (void*) &RowSet, count + i ) ) { fprintf( stderr, "pthread_create error! \n" ); exit( 1 ); } } for( int i = 0; i < NTH; i ++ ) { if( pthread_join( thread[ i ], NULL ) ) { fprintf( stderr, "pthread_join error! \n" ); exit( 1 ); } count_g += count[ i ][0]; sm_count_g += count[ i ][1]; } free( diags ); free( cols ); free( rows ); return; } void set_vec( u64 vecs[], int index[], u64 v, int level ) { vecs[ level ] = v; for( int j = 0; j < N; ++ j ) { u64 tp = ( 1ULL << topbit( v ) ); index[ tp % 37 ] = level; v ^= tp; } } void RowSet( u64* countp ) { // 37 is the common magic number for order <= 6. // for( int j = 0; j < 37; ++ j ) which_row[ j ] = 0; //magic index range !! set_vec( row_vecs, which_row, vecs[ max_row ], N - 1 ); set_vec( row_vecs, which_row, vecs[ second_row ], N - 2 ); int vcsize = ncols * MAX( N - 1, 2 ); int vxsize = ndiags * MAX( N + 1, 3 ); vc = malloc( sizeof( u64 ) * vcsize ); vx = malloc( sizeof( u64 ) * vxsize ); vc[ vcsize - 1 ] = 0ULL; vx[ vxsize - 1 ] = 0ULL; int single0 = rev_max == vecs[ max_row ] || rev_max == vecs[ second_row ]; int irs; for( ; /* irs < n_row_sets */; /* irs ++ */ ) { irs = ( i_row_set ++ ); if( irs >= n_row_sets ) break; int single = single0; for( int j = 0; j < N - 2; j ++ ) { u64 row_vec = row_sets[ irs ][ j ]; set_vec( row_vecs, which_row, row_vec, j ); single = single || rev_max == row_vec; // printf( "i_rowset %d j %d row_vec %09llx\n", irs, j, row_vec ); } int kvc = 0; for( int i = 0; i < ncols; i ++ ) { int cross = 1; for( int j = 0; j < N - 2; j ++ ) { u64 x = cols[ i ] & row_vecs[ j ]; cross = cross && ( ( x & (x-1) ) == 0 ) && ( x != 0 ); } if ( cross ) vc[ kvc ++ ] = cols[ i ]; } int kvx = 0; for( int i = 0; i < ndiags; i ++ ) { int cross = 1; for( int j = 0; j < N - 2; j ++ ) { u64 x = diags[ i ] & row_vecs[ j ]; cross = cross && ( ( x & (x-1) ) == 0 ) && ( x != 0 ); } if ( cross ) vx[ kvx ++ ] = diags[ i ]; } // printf( "irs %d, %d / %d, %d / %d\n", irs, kvc, ncols, kvx, ndiags ); sub_count = 0ULL; sub_sm_count = 0ULL; MakeCols( N - 1, AllBits, vc, vc + kvc, vx, vx + kvx, single ); countp[0] += sub_count; countp[1] += sub_sm_count; #if N> 5 #if V if( irs % 10000 == 0 ) printf( "irs %8d %10lld %10lld\n", irs, countp[0], countp[1] ); #endif #endif } if ( vc[ vcsize - 1 ] != 0ULL || vx[ vxsize - 1 ] != 0ULL ) { fprintf( stderr, " Array over run ! \n"); exit( 1 ); } free( vx ); free( vc ); } void MakeCols( int level, u64 rem, // 46 / 46 u64* vcb, u64* vce, u64* vxb, u64* vxe, int single ) { u64 must_bit = ( 1ULL << topbit( rem ) ); for( ; vcb < vce - level; vcb ++ ) { u64 col_vec = *vcb; if( ! ( col_vec & must_bit ) ) break; col_vecs[ level ] = col_vec; if( level > 1 ) { u64* vcp = vcb + 1; // 3 / 46 u64* vcq = vce; while( vcp < vce ) { *vcq = *vcp; if ( ! ( col_vec & ( *vcp ) ) ) vcq ++ ; vcp ++; } if( vcq - vce < level ) continue; u64* vxp = vxb; // 10 / 46 u64* vxq = vxe; while( vxp < vxe ) { u64 x = col_vec & ( *vxp ); *vxq = *vxp; vxq += ( ( x & (x-1) ) ? 0 : 1 ) - ( x ? 0 : 1 ); vxp ++; } int ndiags = vxq - vxe; // if ( ndiags < 2 ) continue; if( vcq - vce == level ) { MakeColsLast( level - 1, rem ^ col_vec, vce, vxe, vxq, single || ( rev_max == col_vec ) ); } else { MakeCols( level - 1, rem ^ col_vec, vce, vcq, vxe, vxq, single || ( rev_max == col_vec ) ); } } else { int ndiags = DiagPos( vxb, vxe, col_vec, (unsigned*) vxe ); // if ( ndiags < 2 ) continue; u64 col_vecs0 = rem ^ col_vec; // col_vecs[ 0 ] is not set. if( ( col_vecs0 & 1 ) && row_vecs[ N - 1 ] < Reverse( col_vecs0 ) ) continue; int shift = ( single || rev_max == col_vec || rev_max == col_vecs0 ) ? 0 : 1; sub_count += MakeDiag( (unsigned*) vxe, ndiags ) << shift; sub_sm_count += 1 << shift; } } return; } void MakeColsLast( int level, u64 rem, // 8 / 46 u64* cols, u64* vxb, u64* vxe, int single ) { u64 rem_inv = ~ rem; for( int i = 0; i < level; i ++ ) { u64 col_vec = cols[ i ]; if( rem_inv & col_vec ){ /* printf( "!" ); */ return; } rem_inv ^= col_vec; col_vecs[ level - i ] = col_vec; // col_vecs[0] is not set. single = single || ( rev_max == col_vec ); } if( rem_inv & cols[ level ] ){ return; } int shift = ( single || ( rev_max == cols[ level ] ) ) ? 0 : 1; for( int c = 1; c < level/* + 1*/; c ++ ) { // Don't use col_vecs[0]. u64* vxq = vxb; u64* vxp = vxb; while( vxp < vxe ) { u64 x = ( *vxp ) & col_vecs[ c ]; *vxq = *vxp; vxq += ( ( x & (x-1) ) ? 0 : 1 ) - ( x ? 0 : 1 ); vxp ++; } vxe = vxq; // if( vxe - vxb < 2 ) return; } int ndiags = DiagPos( vxb, vxe, col_vecs[ level ], (unsigned*) vxb ); // if( ndiags < 2 ) return; sub_count += MakeDiag( (unsigned*) vxb, ndiags ) << shift; sub_sm_count += 1 << shift; } int DiagPos( u64* vxb, u64* vxe, u64 col, unsigned* cxb ) { u64* vxp = vxb; unsigned* cxp = cxb; while( vxp < vxe ) { u64 x = col & ( *vxp ); if( ( x & (x-1) ) == 0 && x != 0 ) { unsigned pos_code = 0U; int posv = 076543210; // octal number int onev = 011111111; // octal number for( int c = 1; c < N; ++ c ) { // Don't use col_vecs[ 0 ]. int r = which_row[ ( *vxp & col_vecs[ c ] ) % 37 ]; int shift = r * OW; r = ( ( posv >> shift ) & OMASK ); shift += OW; posv -= ( ( onev >> shift ) << shift ); pos_code = ( pos_code + r ) * ( N - c ); } *(cxp ++) = pos_code; } vxp ++; } return cxp - cxb; } unsigned MakeDiag( unsigned* diag_pos, int ndiags ) { unsigned diag_count = 0; for( int dx = 0; dx < ndiags - 1; ++ dx ) { unsigned* cx_chk = diag_chk_a[ diag_pos[ dx ] ]; for( int cx = dx + 1; cx < ndiags; ++ cx ) { unsigned cx_pos = diag_pos[ cx ]; diag_count += ( cx_chk[ cx_pos >> 5 ] >> ( cx_pos & 0x1f ) ) & 1; } } return diag_count; } #ifndef noMD5 void MakeMD5( u64 max_p, u64 second_p, u64 count, char md5str[] ) { MD5_CTX md5ctx; unsigned char md5bin[ MD5_DIGEST_LENGTH ]; const char* sample = "e00000007 1c0000038 494199069345"; int msg_len = strlen( sample ); char msg[ msg_len + 1 ]; sprintf( msg, "%09llx %09llx %15llu", max_p, second_p, count ); MD5_Init( &md5ctx ); MD5_Update( &md5ctx, msg, msg_len ); MD5_Final( md5bin, &md5ctx ); for( int i = 0; i < MD5_DIGEST_LENGTH; i ++ ) sprintf( md5str + ( i * 2 ), "%02x", md5bin[ i ] ); md5str[ MD5_DIGEST_LENGTH * 2 ] = 0; } #endif int main( int argc, char* argv[] ) { fprintf( stderr, "Size of long long unsigned : %lu \n", sizeof( u64 ) ); if( sizeof( u64 ) * 8 < NN ) { fprintf( stderr, "Size of L L U is too small : %lu \n", sizeof( u64 ) ); exit( 1 ); } if( check_topbit( ) ) { fprintf( stderr, "topbit() does't work correctly on this platform.\n" ); exit( 1 ); } GenVecs( NN, ( ( NN + 1 ) * N ) / 2, N - 1, 0ULL ); fprintf( stderr, "nvecs %d\n", nvecs ); MakeDiagCheck( N ); u64 row1P = strtoll( argv[ 2 ], NULL, 16 ); u64 row2P = strtoll( argv[ 4 ], NULL, 16 ); fprintf( stderr, "r1:%09llx r2:%09llx\n", row1P, row2P ); u64 total_count = 0ULL; u64 total_sm_count = 0ULL; for( max_row = 0; max_row < nvecs; ++ max_row ) { u64 max_rowP = vecs[ max_row ]; if( row1P != 0ULL && row1P < max_rowP ) continue; if( max_rowP < row1P ) break; if( ! ( max_rowP & 1ULL << ( NN - 1 ) ) ) break; if( max_rowP < Reverse( vecs[ max_row ] ) ) continue; for( second_row = max_row + 1; second_row < nvecs; ++ second_row ) { u64 second_rowP = vecs[ second_row ]; if( row2P != 0ULL && row2P < second_rowP ) continue; if( second_rowP < row2P ) break; if( max_rowP & second_rowP ) continue; if( max_rowP < Reverse( vecs[ second_row ] ) ) continue; u64 rem = AllBits ^ max_rowP ^ second_rowP; if( rem > second_rowP ) break; count_g = 0ULL; sm_count_g = 0ULL; Count( ); printf( "%05d %09llx ", max_row, vecs[ max_row ] ); printf( "%05d %09llx ", second_row, vecs[ second_row ] ); printf( "%15llu ", count_g ); #ifndef noMD5 char md5str[ MD5_DIGEST_LENGTH * 2 + 1 ]; MakeMD5( vecs[ max_row ], vecs[ second_row ], count_g, md5str ); printf( "%s", md5str ); #endif printf( "\n"); printf( "%05d %09llx ", max_row, vecs[ max_row ] ); printf( "%05d %09llx ", second_row, vecs[ second_row ] ); printf( "S %13llu ", sm_count_g ); #ifndef noMD5 MakeMD5( vecs[ max_row ], vecs[ second_row ], sm_count_g, md5str ); printf( "%s", md5str ); #endif printf( "\n"); total_count += count_g; total_sm_count += sm_count_g; } } if( row2P == 0ULL ) { fprintf( stderr, "Total: %17llu\n", total_count ); fprintf( stderr, "Total: %17llu\n", total_sm_count ); } return 0; }
ms-20230918-m.c.txt · Last modified: 2024/06/22 11:46 by mino