Open-MX 0.5.0 (--disable-mx-wire --disable-endian OMX_RCACHE=1 dmaengine=1) MPICH-MX 1.2.7..5 Dual quad-core Xeon (E5345, 2.33GHz, C1E sleeping disabled) Myri-10G NICs (rx-usecs=1, myri10ge 1.4.1, IRQ to core #0), no switch Process on core #2 (just near core #0) #--------------------------------------------------- # Intel (R) MPI Benchmark Suite V2.3, MPI-1 part #--------------------------------------------------- # Date : Tue Apr 29 20:58:35 2008 # Machine : x86_64# System : Linux # Release : 2.6.23-1-amd64 # Version : #1 SMP Fri Nov 30 12:19:15 UTC 2007 # # Minimum message length in bytes: 0 # Maximum message length in bytes: 4194304 # # MPI_Datatype : MPI_BYTE # MPI_Datatype for reductions : MPI_FLOAT # MPI_Op : MPI_SUM # # # List of Benchmarks to run: # PingPong # PingPing # Sendrecv # Exchange # Allreduce # Reduce # Reduce_scatter # Allgather # Allgatherv # Alltoall # Bcast # Barrier #--------------------------------------------------- # Benchmarking PingPong # #processes = 2 #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 7.02 0.00 1 1000 7.00 0.14 2 1000 7.00 0.27 4 1000 7.11 0.54 8 1000 7.05 1.08 16 1000 7.06 2.16 32 1000 7.18 4.25 64 1000 7.55 8.08 128 1000 7.80 15.65 256 1000 8.85 27.58 512 1000 9.85 49.58 1024 1000 11.73 83.22 2048 1000 15.52 125.85 4096 1000 22.97 170.03 8192 1000 31.47 248.25 16384 1000 46.15 338.59 32768 1000 69.14 451.96 65536 640 103.08 606.32 131072 320 162.09 771.17 262144 160 276.81 903.16 524288 80 489.91 1020.59 1048576 40 909.51 1099.49 2097152 20 1754.83 1139.71 4194304 10 3446.50 1160.60 #--------------------------------------------------- # Benchmarking PingPing # #processes = 2 #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 7.51 0.00 1 1000 7.56 0.13 2 1000 7.54 0.25 4 1000 7.65 0.50 8 1000 7.64 1.00 16 1000 7.64 2.00 32 1000 7.75 3.94 64 1000 8.46 7.21 128 1000 8.77 13.92 256 1000 9.28 26.30 512 1000 10.30 47.41 1024 1000 12.18 80.17 2048 1000 16.17 120.77 4096 1000 24.12 161.92 8192 1000 33.82 231.00 16384 1000 50.95 306.64 32768 1000 77.84 401.45 65536 640 117.12 533.62 131072 320 178.18 701.53 262144 160 295.49 846.06 524288 80 535.68 933.40 1048576 40 1020.25 980.16 2097152 20 1977.61 1011.32 4194304 10 3896.62 1026.53 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 2 #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 7.62 7.63 7.62 0.00 1 1000 7.70 7.71 7.70 0.25 2 1000 7.65 7.65 7.65 0.50 4 1000 7.80 7.80 7.80 0.98 8 1000 7.78 7.78 7.78 1.96 16 1000 7.73 7.74 7.74 3.94 32 1000 7.90 7.91 7.90 7.72 64 1000 8.43 8.44 8.44 14.46 128 1000 8.87 8.88 8.88 27.49 256 1000 9.46 9.46 9.46 51.60 512 1000 10.42 10.43 10.42 93.66 1024 1000 12.37 12.37 12.37 157.92 2048 1000 16.36 16.36 16.36 238.75 4096 1000 24.13 24.15 24.14 323.50 8192 1000 33.75 33.76 33.75 462.87 16384 1000 50.47 50.49 50.48 618.90 32768 1000 77.61 77.62 77.61 805.22 65536 640 117.80 117.81 117.81 1061.01 131072 320 178.79 178.80 178.79 1398.21 262144 160 297.06 297.07 297.07 1683.12 524288 80 541.40 541.55 541.48 1846.55 1048576 40 1035.58 1035.88 1035.73 1930.73 2097152 20 1978.70 1979.35 1979.02 2020.86 4194304 10 3864.69 3865.19 3864.94 2069.75 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 2 #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 12.29 12.31 12.30 0.00 1 1000 12.61 12.62 12.61 0.30 2 1000 12.63 12.65 12.64 0.60 4 1000 12.74 12.75 12.74 1.20 8 1000 12.64 12.65 12.64 2.41 16 1000 12.75 12.75 12.75 4.79 32 1000 12.93 12.95 12.94 9.43 64 1000 13.41 13.42 13.41 18.20 128 1000 13.41 13.41 13.41 36.41 256 1000 15.43 15.44 15.43 63.26 512 1000 16.46 16.47 16.47 118.55 1024 1000 18.26 18.27 18.27 213.80 2048 1000 22.96 22.96 22.96 340.22 4096 1000 34.13 34.13 34.13 457.76 8192 1000 50.45 50.47 50.46 619.23 16384 1000 78.06 78.06 78.06 800.62 32768 1000 135.03 135.05 135.04 925.61 65536 640 227.29 227.30 227.30 1099.88 131072 320 363.78 363.79 363.79 1374.43 262144 160 681.58 681.61 681.59 1467.12 524288 80 1155.11 1155.29 1155.20 1731.17 1048576 40 2168.03 2168.08 2168.05 1844.95 2097152 20 4172.35 4172.50 4172.43 1917.31 4194304 10 8036.59 8037.90 8037.25 1990.57 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.03 0.03 0.03 4 1000 8.65 8.67 8.66 8 1000 8.56 8.56 8.56 16 1000 8.55 8.55 8.55 32 1000 8.73 8.73 8.73 64 1000 9.36 9.36 9.36 128 1000 9.86 9.87 9.87 256 1000 10.70 10.70 10.70 512 1000 11.79 11.80 11.79 1024 1000 13.79 13.79 13.79 2048 1000 18.13 18.13 18.13 4096 1000 34.54 34.55 34.54 8192 1000 52.89 52.92 52.91 16384 1000 74.80 74.81 74.80 32768 1000 111.91 111.93 111.92 65536 640 179.70 179.72 179.71 131072 320 442.22 442.25 442.24 262144 160 755.81 755.88 755.85 524288 80 1366.95 1366.96 1366.96 1048576 40 3004.55 3004.57 3004.56 2097152 20 5084.35 5085.00 5084.68 4194304 10 8542.30 8543.59 8542.94 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.03 0.03 0.03 4 1000 8.20 8.21 8.20 8 1000 8.12 8.13 8.12 16 1000 8.11 8.12 8.12 32 1000 8.26 8.28 8.27 64 1000 8.74 8.75 8.74 128 1000 8.96 8.97 8.96 256 1000 10.12 10.14 10.13 512 1000 11.23 11.24 11.23 1024 1000 13.53 13.55 13.54 2048 1000 17.30 17.32 17.31 4096 1000 24.31 24.33 24.32 8192 1000 35.71 35.74 35.72 16384 1000 54.85 54.88 54.87 32768 1000 87.45 87.49 87.47 65536 640 144.30 144.44 144.37 131072 320 476.46 476.47 476.46 262144 160 864.91 864.92 864.92 524288 80 1576.56 1576.58 1576.57 1048576 40 2535.60 2535.60 2535.60 2097152 20 4267.80 4267.85 4267.82 4194304 10 9984.18 10020.69 10002.43 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 4 1000 2.66 2.70 2.68 8 1000 9.51 9.51 9.51 16 1000 9.34 9.35 9.34 32 1000 9.28 9.28 9.28 64 1000 9.69 9.69 9.69 128 1000 10.12 10.13 10.12 256 1000 10.84 10.85 10.85 512 1000 11.76 11.77 11.76 1024 1000 13.22 13.22 13.22 2048 1000 15.25 15.26 15.26 4096 1000 19.71 19.71 19.71 8192 1000 29.63 29.64 29.64 16384 1000 43.15 43.19 43.17 32768 1000 70.27 70.29 70.28 65536 640 105.71 105.75 105.73 131072 320 183.40 183.41 183.40 262144 160 412.41 412.44 412.42 524288 80 663.54 663.80 663.67 1048576 40 1228.23 1229.02 1228.62 2097152 20 2129.50 2132.95 2131.22 4194304 10 4630.49 4663.68 4647.09 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.03 0.03 0.03 1 1000 8.36 8.37 8.37 2 1000 8.39 8.40 8.40 4 1000 8.44 8.46 8.45 8 1000 8.43 8.43 8.43 16 1000 8.43 8.44 8.44 32 1000 8.57 8.57 8.57 64 1000 8.99 9.00 8.99 128 1000 9.59 9.60 9.59 256 1000 10.35 10.36 10.36 512 1000 11.51 11.51 11.51 1024 1000 13.20 13.21 13.21 2048 1000 17.06 17.08 17.07 4096 1000 25.79 25.80 25.80 8192 1000 36.31 36.31 36.31 16384 1000 54.55 54.58 54.57 32768 1000 83.27 83.29 83.28 65536 640 126.86 126.88 126.87 131072 320 195.96 196.00 195.98 262144 160 332.64 332.67 332.66 524288 80 676.87 676.91 676.89 1048576 40 1451.05 1451.18 1451.11 2097152 20 3077.30 3077.30 3077.30 4194304 10 6535.98 6537.60 6536.79 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.04 0.04 0.04 1 1000 9.78 9.79 9.79 2 1000 9.72 9.73 9.72 4 1000 9.86 9.86 9.86 8 1000 9.69 9.69 9.69 16 1000 9.72 9.73 9.72 32 1000 9.89 9.89 9.89 64 1000 10.33 10.33 10.33 128 1000 10.92 10.92 10.92 256 1000 12.03 12.03 12.03 512 1000 12.98 12.98 12.98 1024 1000 14.86 14.86 14.86 2048 1000 18.90 18.90 18.90 4096 1000 27.59 27.60 27.60 8192 1000 39.18 39.19 39.18 16384 1000 61.41 61.43 61.42 32768 1000 94.43 94.48 94.45 65536 640 224.00 224.02 224.01 131072 320 524.45 524.46 524.46 262144 160 336.11 336.17 336.14 524288 80 626.27 626.41 626.34 1048576 40 1433.72 1433.75 1433.74 2097152 20 3093.10 3093.19 3093.15 4194304 10 6544.90 6545.31 6545.10 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.03 0.03 0.03 1 1000 8.40 8.41 8.41 2 1000 8.40 8.40 8.40 4 1000 8.47 8.48 8.47 8 1000 8.43 8.45 8.44 16 1000 8.41 8.42 8.42 32 1000 8.55 8.56 8.56 64 1000 9.05 9.05 9.05 128 1000 9.62 9.62 9.62 256 1000 10.47 10.48 10.48 512 1000 11.46 11.47 11.46 1024 1000 13.27 13.28 13.28 2048 1000 17.26 17.28 17.27 4096 1000 26.03 26.06 26.04 8192 1000 36.59 36.60 36.59 16384 1000 54.32 54.35 54.33 32768 1000 82.78 82.78 82.78 65536 640 128.07 128.09 128.08 131072 320 198.60 198.61 198.60 262144 160 332.02 332.06 332.04 524288 80 618.55 618.69 618.62 1048576 40 1564.10 1564.12 1564.11 2097152 20 3080.65 3080.70 3080.68 4194304 10 6328.58 6328.99 6328.79 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 2 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.02 0.02 0.02 1 1000 7.26 7.27 7.27 2 1000 7.25 7.25 7.25 4 1000 7.32 7.32 7.32 8 1000 7.30 7.30 7.30 16 1000 7.29 7.29 7.29 32 1000 7.40 7.41 7.40 64 1000 7.72 7.72 7.72 128 1000 7.96 7.96 7.96 256 1000 9.09 9.09 9.09 512 1000 10.11 10.11 10.11 1024 1000 12.04 12.04 12.04 2048 1000 15.87 15.88 15.88 4096 1000 23.35 23.37 23.36 8192 1000 32.01 32.03 32.02 16384 1000 46.22 46.24 46.23 32768 1000 69.71 69.75 69.73 65536 640 103.65 103.67 103.66 131072 320 162.51 162.55 162.53 262144 160 277.96 278.22 278.09 524288 80 490.52 490.69 490.60 1048576 40 908.77 909.07 908.92 2097152 20 1755.00 1755.65 1755.33 4194304 10 3449.92 3451.30 3450.61 #--------------------------------------------------- # Benchmarking Barrier # #processes = 2 #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 7.71 7.71 7.71