Open MPI 1.2.6 BTL TCP Dual quad-core Xeon (E5345, 2.33GHz, C1E sleeping disabled) Myri-10G NICs (rx-usecs=1, myri10ge 1.4.1, IRQ to core #0), no switch Process on core #2 (just near core #0) #--------------------------------------------------- # Intel (R) MPI Benchmark Suite V2.3, MPI-1 part #--------------------------------------------------- # Date : Mon May 5 18:08:04 2008 # Machine : x86_64# System : Linux # Release : 2.6.23-1-amd64 # Version : #1 SMP Fri Nov 30 12:19:15 UTC 2007 # # Minimum message length in bytes: 0 # Maximum message length in bytes: 4194304 # # MPI_Datatype : MPI_BYTE # MPI_Datatype for reductions : MPI_FLOAT # MPI_Op : MPI_SUM # # # List of Benchmarks to run: # PingPong # PingPing # Sendrecv # Exchange # Allreduce # Reduce # Reduce_scatter # Allgather # Allgatherv # Alltoall # Bcast # Barrier #--------------------------------------------------- # Benchmarking PingPong # #processes = 2 #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 14.54 0.00 1 1000 15.32 0.06 2 1000 15.31 0.12 4 1000 15.41 0.25 8 1000 15.33 0.50 16 1000 15.37 0.99 32 1000 15.39 1.98 64 1000 15.48 3.94 128 1000 15.61 7.82 256 1000 16.14 15.13 512 1000 16.92 28.85 1024 1000 18.51 52.76 2048 1000 21.67 90.14 4096 1000 28.63 136.44 8192 1000 40.73 191.80 16384 1000 55.13 283.41 32768 1000 71.05 439.84 65536 640 128.73 485.51 131072 320 186.02 671.96 262144 160 295.95 844.74 524288 80 554.74 901.32 1048576 40 1167.11 856.82 2097152 20 2380.90 840.02 4194304 10 4842.75 825.98 #--------------------------------------------------- # Benchmarking PingPing # #processes = 2 #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 15.22 0.00 1 1000 16.25 0.06 2 1000 16.17 0.12 4 1000 16.20 0.24 8 1000 16.09 0.47 16 1000 16.18 0.94 32 1000 16.16 1.89 64 1000 16.36 3.73 128 1000 16.44 7.43 256 1000 17.13 14.25 512 1000 17.78 27.46 1024 1000 20.03 48.77 2048 1000 23.07 84.65 4096 1000 31.49 124.06 8192 1000 41.39 188.74 16384 1000 66.08 236.44 32768 1000 87.65 356.55 65536 640 159.06 392.94 131072 320 256.31 487.69 262144 160 459.48 544.09 524288 80 863.60 578.97 1048576 40 1823.53 548.39 2097152 20 3859.85 518.15 4194304 10 7991.10 500.56 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 2 #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 15.48 15.50 15.49 0.00 1 1000 16.28 16.28 16.28 0.12 2 1000 16.22 16.22 16.22 0.24 4 1000 16.36 16.37 16.37 0.47 8 1000 16.18 16.19 16.19 0.94 16 1000 16.27 16.28 16.27 1.88 32 1000 16.36 16.37 16.36 3.73 64 1000 16.48 16.50 16.49 7.40 128 1000 16.58 16.58 16.58 14.73 256 1000 17.01 17.01 17.01 28.70 512 1000 18.21 18.21 18.21 53.62 1024 1000 20.28 20.29 20.29 96.25 2048 1000 27.13 27.15 27.14 143.85 4096 1000 31.78 31.81 31.79 245.63 8192 1000 41.94 41.94 41.94 372.55 16384 1000 62.88 62.88 62.88 496.96 32768 1000 86.63 86.64 86.64 721.34 65536 640 222.89 222.98 222.93 560.60 131072 320 344.99 345.20 345.09 724.23 262144 160 581.38 582.13 581.75 858.91 524288 80 1101.74 1102.80 1102.27 906.78 1048576 40 2226.15 2228.57 2227.36 897.44 2097152 20 4648.60 4653.39 4651.00 859.59 4194304 10 9297.80 9311.10 9304.45 859.19 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 2 #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 23.80 23.82 23.81 0.00 1 1000 24.62 24.64 24.63 0.15 2 1000 24.86 24.88 24.87 0.31 4 1000 24.96 24.97 24.96 0.61 8 1000 24.70 24.70 24.70 1.24 16 1000 24.69 24.69 24.69 2.47 32 1000 24.70 24.72 24.71 4.94 64 1000 24.52 24.52 24.52 9.96 128 1000 24.57 24.57 24.57 19.87 256 1000 25.60 25.62 25.61 38.12 512 1000 24.31 24.32 24.32 80.30 1024 1000 25.95 25.96 25.96 150.46 2048 1000 30.26 30.27 30.26 258.10 4096 1000 41.67 41.69 41.68 374.78 8192 1000 61.73 61.77 61.75 505.95 16384 1000 99.05 99.06 99.05 630.96 32768 1000 138.31 138.32 138.31 903.73 65536 640 314.64 314.70 314.67 794.42 131072 320 549.97 550.19 550.08 908.78 262144 160 1002.06 1002.69 1002.37 997.32 524288 80 2064.93 2066.22 2065.58 967.95 1048576 40 4199.80 4202.45 4201.12 951.83 2097152 20 8820.95 8824.80 8822.88 906.54 4194304 10 17461.80 17475.30 17468.55 915.58 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.02 0.02 4 1000 16.71 16.72 16.72 8 1000 16.53 16.55 16.54 16 1000 16.41 16.41 16.41 32 1000 16.52 16.53 16.52 64 1000 16.73 16.74 16.73 128 1000 17.19 17.19 17.19 256 1000 17.57 17.57 17.57 512 1000 18.63 18.64 18.64 1024 1000 20.36 20.38 20.37 2048 1000 24.33 24.33 24.33 4096 1000 32.01 32.02 32.01 8192 1000 45.43 45.43 45.43 16384 1000 86.78 86.78 86.78 32768 1000 141.74 141.79 141.77 65536 640 187.53 187.57 187.55 131072 320 483.23 483.40 483.31 262144 160 578.09 578.34 578.21 524288 80 1031.47 1032.31 1031.89 1048576 40 2218.75 2222.00 2220.38 2097152 20 5188.41 5196.60 5192.50 4194304 10 12078.09 12090.40 12084.25 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.02 0.02 0.02 4 1000 15.69 15.71 15.70 8 1000 15.57 15.59 15.58 16 1000 15.65 15.67 15.66 32 1000 15.74 15.75 15.74 64 1000 15.82 15.83 15.83 128 1000 16.06 16.08 16.07 256 1000 16.56 16.58 16.57 512 1000 17.45 17.47 17.46 1024 1000 19.30 19.32 19.31 2048 1000 23.38 23.41 23.39 4096 1000 31.24 31.27 31.26 8192 1000 44.96 45.01 44.99 16384 1000 62.43 62.49 62.46 32768 1000 84.18 84.26 84.22 65536 640 129.28 129.43 129.35 131072 320 269.61 269.88 269.75 262144 160 505.09 505.58 505.33 524288 80 987.18 988.25 987.71 1048576 40 2026.12 2028.33 2027.23 2097152 20 4260.80 4264.14 4262.47 4194304 10 8724.69 8735.80 8730.24 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.02 0.02 0.02 4 1000 2.14 3.67 2.91 8 1000 31.18 31.20 31.19 16 1000 31.14 31.15 31.15 32 1000 31.27 31.29 31.28 64 1000 31.38 31.39 31.38 128 1000 31.68 31.70 31.69 256 1000 32.39 32.41 32.40 512 1000 33.72 33.73 33.72 1024 1000 36.37 36.39 36.38 2048 1000 41.67 41.69 41.68 4096 1000 52.48 52.49 52.48 8192 1000 73.24 73.27 73.25 16384 1000 103.39 103.43 103.41 32768 1000 141.17 141.22 141.19 65536 640 204.54 204.62 204.58 131072 320 406.72 406.93 406.83 262144 160 713.51 714.02 713.77 524288 80 1332.07 1333.91 1332.99 1048576 40 2667.18 2671.13 2669.15 2097152 20 6196.00 6198.55 6197.27 4194304 10 12972.00 12985.90 12978.95 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 1 1000 16.13 16.13 16.13 2 1000 16.08 16.09 16.08 4 1000 16.41 16.43 16.42 8 1000 16.10 16.12 16.11 16 1000 16.15 16.15 16.15 32 1000 16.33 16.34 16.33 64 1000 16.51 16.51 16.51 128 1000 16.75 16.76 16.75 256 1000 17.17 17.17 17.17 512 1000 18.19 18.19 18.19 1024 1000 19.96 19.96 19.96 2048 1000 25.62 25.62 25.62 4096 1000 31.60 31.62 31.61 8192 1000 43.50 43.51 43.51 16384 1000 70.02 70.05 70.03 32768 1000 92.39 92.41 92.40 65536 640 232.04 232.13 232.08 131072 320 361.79 362.02 361.90 262144 160 643.50 644.27 643.89 524288 80 995.10 995.53 995.31 1048576 40 2197.45 2198.23 2197.84 2097152 20 5201.45 5202.51 5201.98 4194304 10 10846.69 10853.89 10850.29 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.02 0.02 0.02 1 1000 32.75 32.77 32.76 2 1000 32.89 32.90 32.89 4 1000 32.84 32.86 32.85 8 1000 32.83 32.85 32.84 16 1000 32.92 32.94 32.93 32 1000 33.25 33.26 33.25 64 1000 33.34 33.35 33.35 128 1000 33.95 33.97 33.96 256 1000 35.19 35.21 35.20 512 1000 37.56 37.58 37.57 1024 1000 42.47 42.49 42.48 2048 1000 52.19 52.22 52.21 4096 1000 71.29 71.33 71.31 8192 1000 98.27 98.33 98.30 16384 1000 128.43 128.48 128.46 32768 1000 202.13 202.22 202.17 65536 640 316.28 316.37 316.33 131072 320 482.11 482.51 482.31 262144 160 816.86 817.63 817.24 524288 80 1583.91 1586.30 1585.11 1048576 40 3502.63 3506.25 3504.44 2097152 20 7826.15 7832.71 7829.43 4194304 10 16361.09 16375.30 16368.20 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 1 1000 16.30 16.30 16.30 2 1000 16.19 16.20 16.19 4 1000 16.25 16.26 16.26 8 1000 16.30 16.31 16.31 16 1000 16.30 16.31 16.30 32 1000 16.45 16.46 16.45 64 1000 16.50 16.50 16.50 128 1000 16.67 16.67 16.67 256 1000 17.15 17.15 17.15 512 1000 17.94 17.94 17.94 1024 1000 20.45 20.45 20.45 2048 1000 25.45 25.47 25.46 4096 1000 32.37 32.38 32.37 8192 1000 42.77 42.77 42.77 16384 1000 67.79 67.80 67.80 32768 1000 92.03 92.04 92.03 65536 640 231.75 231.82 231.79 131072 320 363.31 363.53 363.42 262144 160 640.71 641.33 641.02 524288 80 1004.33 1004.39 1004.36 1048576 40 2362.80 2364.28 2363.54 2097152 20 5297.24 5297.70 5297.47 4194304 10 10862.68 10874.20 10868.44 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.02 0.02 1 1000 15.78 15.81 15.79 2 1000 15.72 15.73 15.72 4 1000 15.89 15.91 15.90 8 1000 15.75 15.77 15.76 16 1000 15.82 15.84 15.83 32 1000 15.83 15.85 15.84 64 1000 16.01 16.03 16.02 128 1000 16.10 16.12 16.11 256 1000 16.54 16.55 16.54 512 1000 17.34 17.36 17.35 1024 1000 19.01 19.03 19.02 2048 1000 24.31 24.34 24.33 4096 1000 30.27 30.30 30.29 8192 1000 41.67 41.71 41.69 16384 1000 64.93 64.97 64.95 32768 1000 110.69 110.73 110.71 65536 640 188.82 188.92 188.87 131072 320 382.66 382.86 382.76 262144 160 687.27 687.78 687.52 524288 80 737.23 738.24 737.73 1048576 40 1527.98 1529.65 1528.81 2097152 20 3234.40 3237.20 3235.80 4194304 10 6652.40 6658.29 6655.35 #--------------------------------------------------- # Benchmarking Barrier # #processes = 2 #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 15.33 15.33 15.33