Open-MX 0.5.0 (--disable-mx-wire --disable-endian OMX_RCACHE=1 dmaengine=1) Open MPI 1.2.6 PML CM Dual quad-core Xeon (E5345, 2.33GHz, C1E sleeping disabled) Myri-10G NICs (rx-usecs=1, myri10ge 1.4.1, IRQ to core #0), no switch Process on core #2 (just near core #0) #--------------------------------------------------- # Intel (R) MPI Benchmark Suite V2.3, MPI-1 part #--------------------------------------------------- # Date : Tue Apr 29 21:49:24 2008 # Machine : x86_64# System : Linux # Release : 2.6.23-1-amd64 # Version : #1 SMP Fri Nov 30 12:19:15 UTC 2007 # # Minimum message length in bytes: 0 # Maximum message length in bytes: 4194304 # # MPI_Datatype : MPI_BYTE # MPI_Datatype for reductions : MPI_FLOAT # MPI_Op : MPI_SUM # # # List of Benchmarks to run: # PingPong # PingPing # Sendrecv # Exchange # Allreduce # Reduce # Reduce_scatter # Allgather # Allgatherv # Alltoall # Bcast # Barrier #--------------------------------------------------- # Benchmarking PingPong # #processes = 2 #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 7.21 0.00 1 1000 7.25 0.13 2 1000 7.24 0.26 4 1000 7.32 0.52 8 1000 7.27 1.05 16 1000 7.28 2.10 32 1000 7.40 4.12 64 1000 7.73 7.90 128 1000 8.00 15.26 256 1000 9.10 26.82 512 1000 10.11 48.31 1024 1000 12.06 80.99 2048 1000 15.87 123.05 4096 1000 23.42 166.77 8192 1000 32.14 243.10 16384 1000 46.80 333.89 32768 1000 69.60 448.97 65536 640 105.58 591.96 131072 320 166.20 752.09 262144 160 283.70 881.21 524288 80 502.25 995.52 1048576 40 939.05 1064.91 2097152 20 1808.50 1105.89 4194304 10 3558.10 1124.20 #--------------------------------------------------- # Benchmarking PingPing # #processes = 2 #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 7.65 0.00 1 1000 7.72 0.12 2 1000 7.70 0.25 4 1000 7.80 0.49 8 1000 7.73 0.99 16 1000 7.76 1.97 32 1000 7.90 3.86 64 1000 8.52 7.16 128 1000 8.97 13.60 256 1000 9.63 25.36 512 1000 10.62 45.99 1024 1000 12.42 78.60 2048 1000 16.51 118.30 4096 1000 24.58 158.93 8192 1000 34.43 226.88 16384 1000 51.41 303.91 32768 1000 79.07 395.20 65536 640 122.37 510.74 131072 320 189.73 658.82 262144 160 313.64 797.09 524288 80 656.41 761.71 1048576 40 1175.93 850.39 2097152 20 2168.31 922.38 4194304 10 4364.30 916.53 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 2 #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 7.88 7.89 7.88 0.00 1 1000 7.90 7.91 7.91 0.24 2 1000 7.89 7.90 7.90 0.48 4 1000 8.00 8.01 8.01 0.95 8 1000 7.99 8.00 8.00 1.91 16 1000 7.97 7.97 7.97 3.83 32 1000 8.12 8.12 8.12 7.51 64 1000 8.71 8.72 8.72 13.99 128 1000 9.21 9.21 9.21 26.50 256 1000 9.73 9.73 9.73 50.17 512 1000 10.69 10.69 10.69 91.33 1024 1000 12.48 12.48 12.48 156.46 2048 1000 16.60 16.61 16.60 235.22 4096 1000 24.68 24.69 24.69 316.44 8192 1000 34.57 34.58 34.58 451.79 16384 1000 51.04 51.06 51.05 612.01 32768 1000 78.55 78.55 78.55 795.66 65536 640 123.00 123.00 123.00 1016.25 131072 320 191.00 191.02 191.01 1308.75 262144 160 321.96 322.03 321.99 1552.65 524288 80 632.28 632.44 632.36 1581.18 1048576 40 1174.07 1174.30 1174.19 1703.14 2097152 20 2192.90 2193.56 2193.23 1823.52 4194304 10 4381.11 4383.02 4382.06 1825.23 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 2 #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 13.06 13.06 13.06 0.00 1 1000 12.70 12.71 12.70 0.30 2 1000 13.11 13.12 13.11 0.58 4 1000 12.91 12.91 12.91 1.18 8 1000 13.04 13.04 13.04 2.34 16 1000 13.01 13.01 13.01 4.69 32 1000 13.31 13.32 13.32 9.16 64 1000 13.55 13.55 13.55 18.01 128 1000 13.76 13.76 13.76 35.48 256 1000 15.79 15.79 15.79 61.83 512 1000 16.80 16.81 16.80 116.22 1024 1000 18.59 18.59 18.59 210.09 2048 1000 23.55 23.55 23.55 331.70 4096 1000 34.30 34.30 34.30 455.51 8192 1000 51.42 51.44 51.43 607.50 16384 1000 77.29 77.31 77.30 808.49 32768 1000 135.48 135.49 135.49 922.55 65536 640 243.13 243.14 243.13 1028.23 131072 320 377.17 377.20 377.19 1325.55 262144 160 811.05 811.13 811.09 1232.86 524288 80 1468.11 1468.14 1468.13 1362.27 1048576 40 2605.35 2605.58 2605.46 1535.17 2097152 20 4658.06 4658.10 4658.08 1717.44 4194304 10 8551.50 8552.41 8551.96 1870.82 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 4 1000 8.34 8.35 8.35 8 1000 8.19 8.20 8.19 16 1000 8.19 8.20 8.20 32 1000 8.33 8.33 8.33 64 1000 8.78 8.78 8.78 128 1000 9.45 9.45 9.45 256 1000 10.13 10.13 10.13 512 1000 11.52 11.55 11.54 1024 1000 13.61 13.62 13.61 2048 1000 18.04 18.07 18.05 4096 1000 27.13 27.15 27.14 8192 1000 39.09 39.11 39.10 16384 1000 74.65 74.65 74.65 32768 1000 113.00 113.03 113.02 65536 640 178.69 178.70 178.70 131072 320 442.50 442.53 442.51 262144 160 728.93 729.01 728.97 524288 80 1573.94 1574.05 1573.99 1048576 40 3020.90 3021.00 3020.95 2097152 20 5160.90 5161.15 5161.03 4194304 10 10151.98 10152.79 10152.39 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.02 0.02 0.02 4 1000 7.54 7.56 7.55 8 1000 7.47 7.48 7.48 16 1000 7.47 7.47 7.47 32 1000 7.56 7.56 7.56 64 1000 7.84 7.84 7.84 128 1000 8.16 8.17 8.16 256 1000 9.40 9.41 9.41 512 1000 10.57 10.58 10.57 1024 1000 12.66 12.68 12.67 2048 1000 17.18 17.20 17.19 4096 1000 25.44 25.46 25.45 8192 1000 35.69 35.72 35.70 16384 1000 53.46 53.50 53.48 32768 1000 82.29 82.35 82.32 65536 640 130.52 130.65 130.58 131072 320 258.35 258.41 258.38 262144 160 509.03 509.21 509.12 524288 80 1020.69 1021.05 1020.87 1048576 40 2119.59 2120.10 2119.85 2097152 20 4582.55 4584.50 4583.53 4194304 10 9433.29 9437.51 9435.40 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.02 0.02 0.02 4 1000 1.68 1.69 1.69 8 1000 14.87 14.87 14.87 16 1000 14.79 14.80 14.79 32 1000 14.93 14.93 14.93 64 1000 15.35 15.36 15.35 128 1000 15.91 15.92 15.92 256 1000 17.35 17.36 17.36 512 1000 19.76 19.77 19.76 1024 1000 22.82 22.83 22.83 2048 1000 29.02 29.03 29.03 4096 1000 40.90 40.92 40.91 8192 1000 58.76 58.78 58.77 16384 1000 85.34 85.36 85.35 32768 1000 130.90 130.93 130.92 65536 640 203.53 203.60 203.56 131072 320 374.53 374.56 374.54 262144 160 697.90 697.93 697.92 524288 80 1358.41 1358.43 1358.42 1048576 40 2776.37 2776.75 2776.56 2097152 20 7267.11 7274.56 7270.83 4194304 10 15194.80 15228.89 15211.84 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.02 0.01 1 1000 8.16 8.16 8.16 2 1000 8.16 8.17 8.16 4 1000 8.23 8.23 8.23 8 1000 8.20 8.20 8.20 16 1000 8.22 8.22 8.22 32 1000 8.36 8.37 8.36 64 1000 8.85 8.85 8.85 128 1000 9.36 9.36 9.36 256 1000 10.09 10.09 10.09 512 1000 11.16 11.17 11.17 1024 1000 13.22 13.23 13.23 2048 1000 17.25 17.28 17.26 4096 1000 25.50 25.52 25.51 8192 1000 36.01 36.01 36.01 16384 1000 54.10 54.12 54.11 32768 1000 83.40 83.43 83.42 65536 640 132.01 132.02 132.01 131072 320 205.46 205.49 205.47 262144 160 349.61 349.63 349.62 524288 80 728.40 728.51 728.46 1048576 40 1587.27 1587.85 1587.56 2097152 20 3102.99 3106.06 3104.53 4194304 10 7047.89 7050.51 7049.20 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.02 0.02 0.02 1 1000 16.11 16.11 16.11 2 1000 16.12 16.13 16.13 4 1000 16.16 16.18 16.17 8 1000 16.16 16.16 16.16 16 1000 16.30 16.30 16.30 32 1000 16.65 16.65 16.65 64 1000 17.25 17.26 17.26 128 1000 18.70 18.71 18.71 256 1000 20.87 20.88 20.87 512 1000 23.91 23.93 23.92 1024 1000 29.57 29.59 29.58 2048 1000 41.12 41.14 41.13 4096 1000 58.05 58.08 58.06 8192 1000 81.50 81.53 81.52 16384 1000 119.18 119.23 119.21 32768 1000 177.69 177.70 177.69 65536 640 277.81 277.82 277.81 131072 320 462.44 462.46 462.45 262144 160 812.68 812.68 812.68 524288 80 1497.31 1497.31 1497.31 1048576 40 2884.53 2884.55 2884.54 2097152 20 6126.59 6127.05 6126.82 4194304 10 13114.40 13115.79 13115.10 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.02 0.01 1 1000 8.20 8.20 8.20 2 1000 8.21 8.21 8.21 4 1000 8.28 8.28 8.28 8 1000 8.19 8.19 8.19 16 1000 8.23 8.23 8.23 32 1000 8.36 8.37 8.36 64 1000 8.95 8.95 8.95 128 1000 9.42 9.43 9.42 256 1000 10.09 10.09 10.09 512 1000 11.22 11.22 11.22 1024 1000 13.11 13.11 13.11 2048 1000 16.97 16.97 16.97 4096 1000 25.85 25.86 25.86 8192 1000 35.99 36.01 36.00 16384 1000 54.29 54.35 54.32 32768 1000 82.62 82.63 82.62 65536 640 133.57 133.57 133.57 131072 320 212.54 212.55 212.54 262144 160 351.64 351.66 351.65 524288 80 717.06 717.19 717.12 1048576 40 1357.10 1357.30 1357.20 2097152 20 3087.40 3089.59 3088.50 4194304 10 7016.71 7017.71 7017.21 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.02 0.01 1 1000 7.91 7.91 7.91 2 1000 7.74 7.74 7.74 4 1000 7.84 7.85 7.85 8 1000 7.76 7.78 7.77 16 1000 7.76 7.76 7.76 32 1000 7.88 7.88 7.88 64 1000 8.13 8.13 8.13 128 1000 8.39 8.40 8.39 256 1000 9.62 9.62 9.62 512 1000 10.66 10.67 10.67 1024 1000 12.60 12.60 12.60 2048 1000 17.39 17.40 17.39 4096 1000 23.56 23.58 23.57 8192 1000 34.54 34.57 34.56 16384 1000 52.50 52.53 52.51 32768 1000 87.83 87.86 87.85 65536 640 155.05 155.12 155.09 131072 320 280.07 280.08 280.08 262144 160 508.01 508.05 508.03 524288 80 683.78 683.79 683.78 1048576 40 1374.63 1374.98 1374.80 2097152 20 2760.65 2760.76 2760.70 4194304 10 5521.68 5522.39 5522.04 #--------------------------------------------------- # Benchmarking Barrier # #processes = 2 #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 8.12 8.12 8.12