Open-MX 1.0.1 (--disable-endian OMX_RCACHE=1 dmaengine=1) Open MPI 1.2.6 PML CM Single quad-core Xeon (X5460, 3.16GHz, C1E sleeping disabled) Broadcom bnx2 NICs (rx-usecs=1), no switch #--------------------------------------------------- # Intel (R) MPI Benchmark Suite V2.3, MPI-1 part #--------------------------------------------------- # Date : Thu Jan 22 10:20:23 2009 # Machine : x86_64# System : Linux # Release : 2.6.26-1-amd64 # Version : #1 SMP Mon Dec 15 17:25:36 UTC 2008 # # Minimum message length in bytes: 0 # Maximum message length in bytes: 4194304 # # MPI_Datatype : MPI_BYTE # MPI_Datatype for reductions : MPI_FLOAT # MPI_Op : MPI_SUM # # # List of Benchmarks to run: # PingPong # PingPing # Sendrecv # Exchange # Allreduce # Reduce # Reduce_scatter # Allgather # Allgatherv # Alltoall # Bcast # Barrier #--------------------------------------------------- # Benchmarking PingPong # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 17.89 0.00 1 1000 18.09 0.05 2 1000 18.03 0.11 4 1000 18.08 0.21 8 1000 18.09 0.42 16 1000 18.11 0.84 32 1000 18.36 1.66 64 1000 19.03 3.21 128 1000 22.24 5.49 256 1000 24.62 9.92 512 1000 28.38 17.20 1024 1000 35.13 27.80 2048 1000 48.56 40.22 4096 1000 74.76 52.25 8192 1000 107.38 72.75 16384 1000 174.08 89.76 32768 1000 307.31 101.69 65536 640 615.90 101.48 131072 320 1148.05 108.88 262144 160 2217.88 112.72 524288 80 4343.80 115.11 1048576 40 8594.78 116.35 2097152 20 17098.27 116.97 4194304 10 34143.56 117.15 #--------------------------------------------------- # Benchmarking PingPing # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 20.35 0.00 1 1000 21.46 0.04 2 1000 21.75 0.09 4 1000 20.92 0.18 8 1000 21.58 0.35 16 1000 20.69 0.74 32 1000 23.27 1.31 64 1000 23.66 2.58 128 1000 30.39 4.02 256 1000 36.42 6.70 512 1000 40.99 11.91 1024 1000 37.48 26.05 2048 1000 50.37 38.78 4096 1000 75.25 51.91 8192 1000 107.64 72.58 16384 1000 174.03 89.78 32768 1000 308.71 101.23 65536 640 766.94 81.49 131072 320 1282.89 97.44 262144 160 2377.60 105.15 524288 80 4569.93 109.41 1048576 40 9043.35 110.58 2097152 20 17886.95 111.81 4194304 10 35126.90 113.87 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 20.17 20.20 20.18 0.00 1 1000 20.96 21.00 20.98 0.09 2 1000 20.84 20.85 20.85 0.18 4 1000 20.85 20.86 20.86 0.37 8 1000 22.25 22.28 22.27 0.68 16 1000 20.93 20.97 20.95 1.46 32 1000 21.77 21.81 21.79 2.80 64 1000 22.19 22.19 22.19 5.50 128 1000 27.72 27.77 27.75 8.79 256 1000 35.37 35.42 35.39 13.79 512 1000 38.41 38.42 38.42 25.42 1024 1000 36.28 36.30 36.29 53.81 2048 1000 50.81 50.82 50.82 76.86 4096 1000 75.73 75.76 75.75 103.12 8192 1000 107.89 107.90 107.89 144.81 16384 1000 174.27 174.29 174.28 179.30 32768 1000 307.80 307.82 307.81 203.04 65536 640 950.99 951.01 951.00 131.44 131072 320 1594.74 1594.83 1594.79 156.76 262144 160 2729.26 2729.36 2729.31 183.19 524288 80 4952.95 4952.95 4952.95 201.90 1048576 40 9506.75 9507.80 9507.27 210.35 2097152 20 17881.85 17883.05 17882.45 223.68 4194304 10 35812.69 35813.12 35812.90 223.38 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 25.36 25.39 25.38 0.00 1 1000 23.62 23.64 23.63 0.08 2 1000 23.50 23.55 23.52 0.16 4 1000 23.48 23.54 23.51 0.32 8 1000 23.57 23.61 23.58 0.65 16 1000 23.93 23.95 23.94 1.27 32 1000 25.16 25.19 25.17 2.42 64 1000 27.21 27.25 27.23 4.48 128 1000 32.01 32.04 32.03 7.62 256 1000 41.33 41.38 41.35 11.80 512 1000 45.88 45.98 45.94 21.24 1024 1000 40.26 40.31 40.29 48.45 2048 1000 51.28 51.39 51.33 76.01 4096 1000 76.71 76.76 76.73 101.78 8192 1000 133.71 133.79 133.75 116.78 16384 1000 267.02 267.16 267.10 116.97 32768 1000 533.64 533.92 533.78 117.06 65536 640 1378.13 1379.43 1378.87 90.62 131072 320 2784.62 2784.84 2784.73 89.77 262144 160 4777.14 4777.41 4777.34 104.66 524288 80 9257.80 9258.39 9257.96 108.01 1048576 40 17646.55 17647.90 17647.29 113.33 2097152 20 35316.19 35318.90 35317.13 113.25 4194304 10 69508.79 69631.60 69597.73 114.89 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 8 #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 25.23 25.26 25.25 0.00 1 1000 23.33 23.41 23.36 0.08 2 1000 27.07 27.14 27.11 0.14 4 1000 26.03 26.10 26.06 0.29 8 1000 23.09 23.18 23.14 0.66 16 1000 24.82 24.92 24.88 1.22 32 1000 27.93 27.99 27.97 2.18 64 1000 26.17 26.25 26.20 4.65 128 1000 29.29 29.36 29.33 8.32 256 1000 35.87 35.93 35.90 13.59 512 1000 37.09 37.22 37.15 26.24 1024 1000 43.13 43.27 43.19 45.14 2048 1000 68.52 68.60 68.56 56.94 4096 1000 134.00 134.12 134.06 58.25 8192 1000 267.29 267.49 267.39 58.41 16384 1000 533.77 534.30 534.04 58.49 32768 1000 1066.88 1067.81 1067.34 58.53 65536 640 2616.93 2619.57 2618.75 47.72 131072 320 4872.65 4872.86 4872.76 51.30 262144 160 9168.42 9168.67 9168.55 54.53 524288 80 17752.59 17753.20 17752.92 56.33 1048576 40 35091.75 35094.02 35092.68 56.99 2097152 20 68762.55 68764.65 68763.62 58.17 4194304 10 137369.70 137381.01 137374.97 58.23 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 21.34 21.37 21.35 0.00 1 1000 22.81 22.82 22.81 0.17 2 1000 21.72 21.76 21.74 0.35 4 1000 21.51 21.54 21.52 0.71 8 1000 23.00 23.01 23.00 1.33 16 1000 22.72 22.76 22.74 2.68 32 1000 21.73 21.77 21.75 5.61 64 1000 25.15 25.19 25.17 9.69 128 1000 36.72 36.75 36.73 13.29 256 1000 53.14 53.19 53.17 18.36 512 1000 81.53 81.58 81.55 23.94 1024 1000 45.41 45.43 45.42 85.98 2048 1000 65.82 65.83 65.83 118.67 4096 1000 107.77 107.78 107.78 144.97 8192 1000 174.00 174.01 174.00 179.59 16384 1000 307.50 307.51 307.51 203.24 32768 1000 574.14 574.19 574.17 217.70 65536 640 1300.21 1300.25 1300.23 192.27 131072 320 2501.35 2501.36 2501.35 199.89 262144 160 4674.28 4674.50 4674.39 213.93 524288 80 9105.32 9105.45 9105.39 219.65 1048576 40 17915.67 17915.68 17915.68 223.27 2097152 20 35607.15 35608.15 35607.65 224.67 4194304 10 70630.19 70631.00 70630.60 226.53 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 35.16 35.17 35.16 0.00 1 1000 38.17 38.18 38.18 0.10 2 1000 43.02 43.06 43.04 0.18 4 1000 46.44 46.44 46.44 0.33 8 1000 41.93 41.95 41.94 0.73 16 1000 44.42 44.47 44.44 1.37 32 1000 40.72 40.77 40.75 2.99 64 1000 52.03 52.08 52.06 4.69 128 1000 72.40 72.43 72.42 6.74 256 1000 70.09 70.10 70.09 13.93 512 1000 61.88 61.93 61.90 31.54 1024 1000 56.94 56.95 56.94 68.59 2048 1000 84.06 84.11 84.08 92.89 4096 1000 142.43 142.49 142.46 109.66 8192 1000 266.72 266.88 266.80 117.09 16384 1000 533.38 533.63 533.50 117.12 32768 1000 1066.77 1067.09 1066.93 117.14 65536 640 2401.28 2402.69 2402.09 104.05 131072 320 4699.56 4699.68 4699.62 106.39 262144 160 9081.98 9082.26 9082.16 110.10 524288 80 17715.50 17716.59 17716.03 112.89 1048576 40 34862.15 34862.35 34862.26 114.74 2097152 20 69344.75 69348.11 69346.37 115.36 4194304 10 138366.79 138370.11 138368.00 115.63 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 8 #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 29.73 29.75 29.74 0.00 1 1000 34.78 34.85 34.82 0.11 2 1000 33.38 33.43 33.39 0.23 4 1000 34.11 34.14 34.13 0.45 8 1000 33.69 33.73 33.71 0.90 16 1000 32.57 32.63 32.60 1.87 32 1000 34.70 34.73 34.71 3.51 64 1000 35.89 35.91 35.90 6.80 128 1000 44.96 44.99 44.98 10.85 256 1000 51.94 51.97 51.95 18.79 512 1000 53.56 53.59 53.58 36.44 1024 1000 74.31 74.36 74.34 52.53 2048 1000 135.63 135.76 135.70 57.55 4096 1000 266.62 266.85 266.74 58.55 8192 1000 533.23 533.57 533.42 58.57 16384 1000 1066.36 1067.11 1066.77 58.57 32768 1000 2132.74 2134.07 2133.47 58.57 65536 640 4492.60 4494.33 4493.59 55.63 131072 320 9647.10 9647.27 9647.19 51.83 262144 160 18281.62 18281.79 18281.69 54.70 524288 80 35311.08 35324.64 35322.75 56.62 1048576 40 69447.90 69449.85 69448.82 57.60 2097152 20 137728.36 137731.95 137729.97 58.08 4194304 10 274143.51 274146.70 274144.75 58.36 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 4 1000 21.55 21.55 21.55 8 1000 21.33 21.34 21.34 16 1000 21.76 21.80 21.78 32 1000 22.19 22.24 22.22 64 1000 23.23 23.27 23.25 128 1000 31.90 31.95 31.93 256 1000 37.53 37.54 37.53 512 1000 41.82 41.84 41.83 1024 1000 37.15 37.23 37.19 2048 1000 51.49 51.52 51.50 4096 1000 76.95 76.98 76.96 8192 1000 110.74 110.76 110.75 16384 1000 220.71 220.73 220.72 32768 1000 358.39 358.41 358.40 65536 640 631.37 631.44 631.41 131072 320 1992.75 1992.80 1992.77 262144 160 2753.47 2753.69 2753.58 524288 80 5161.77 5161.95 5161.86 1048576 40 10308.68 10309.90 10309.29 2097152 20 21192.35 21194.15 21193.25 4194304 10 41350.58 41353.89 41352.24 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 4 1000 28.00 28.05 28.03 8 1000 28.11 28.12 28.12 16 1000 26.98 27.01 26.99 32 1000 26.28 26.32 26.30 64 1000 31.74 31.76 31.75 128 1000 41.96 42.00 41.98 256 1000 63.95 64.04 64.00 512 1000 92.88 92.92 92.90 1024 1000 49.37 49.42 49.39 2048 1000 69.78 69.80 69.79 4096 1000 114.19 114.22 114.21 8192 1000 190.72 190.72 190.72 16384 1000 460.00 460.06 460.02 32768 1000 802.38 802.46 802.42 65536 640 1604.34 1604.81 1604.62 131072 320 3202.88 3204.05 3203.46 262144 160 9901.68 9905.28 9903.50 524288 80 19099.49 19099.97 19099.64 1048576 40 32180.12 32183.12 32181.66 2097152 20 60705.70 60708.09 60706.79 4194304 10 116257.10 116260.22 116259.08 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 4 1000 55.08 55.10 55.09 8 1000 55.26 55.32 55.29 16 1000 54.20 54.22 54.21 32 1000 56.97 57.04 57.00 64 1000 71.16 71.18 71.17 128 1000 83.62 83.64 83.63 256 1000 77.37 77.42 77.40 512 1000 74.78 74.84 74.81 1024 1000 77.20 77.22 77.21 2048 1000 106.96 107.05 107.02 4096 1000 186.58 186.65 186.61 8192 1000 337.92 337.93 337.93 16384 1000 959.20 959.41 959.33 32768 1000 1876.71 1876.86 1876.78 65536 640 3743.86 3744.44 3744.15 131072 320 7478.50 7479.81 7479.15 262144 160 14947.32 14952.84 14950.09 524288 80 40093.09 40110.42 40101.40 1048576 40 72643.23 72645.52 72644.37 2097152 20 137533.15 137536.39 137534.97 4194304 10 267813.71 267820.41 267818.20 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.02 0.01 4 1000 18.23 18.27 18.25 8 1000 18.18 18.22 18.20 16 1000 18.20 18.23 18.21 32 1000 18.71 18.76 18.74 64 1000 19.12 19.15 19.14 128 1000 29.11 29.17 29.14 256 1000 24.86 24.89 24.88 512 1000 28.73 28.78 28.76 1024 1000 35.78 35.81 35.79 2048 1000 2561.16 2561.21 2561.19 4096 1000 75.88 75.90 75.89 8192 1000 110.02 110.14 110.08 16384 1000 178.83 178.93 178.88 32768 1000 316.05 316.37 316.21 65536 640 582.60 583.48 583.04 131072 320 1267.30 1267.30 1267.30 262144 160 2532.69 2532.77 2532.73 524288 80 5086.89 5087.09 5086.99 1048576 40 10163.18 10163.78 10163.48 2097152 20 20498.80 20500.40 20499.60 4194304 10 41582.61 41586.09 41584.35 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.02 0.01 4 1000 19.89 19.96 19.92 8 1000 20.13 20.22 20.18 16 1000 20.07 20.10 20.08 32 1000 20.56 20.60 20.58 64 1000 21.11 21.21 21.16 128 1000 25.27 25.33 25.30 256 1000 28.51 28.57 28.54 512 1000 32.75 32.82 32.79 1024 1000 38.43 38.52 38.47 2048 1000 924.11 924.22 924.17 4096 1000 82.41 82.57 82.50 8192 1000 118.60 118.82 118.72 16384 1000 192.60 192.96 192.79 32768 1000 947.72 948.64 948.18 65536 640 1219.90 1221.72 1220.91 131072 320 2179.34 2185.91 2182.89 262144 160 4294.72 4320.86 4308.22 524288 80 8488.44 8592.46 8540.87 1048576 40 27016.90 27089.83 27067.01 2097152 20 55765.90 55913.35 55867.82 4194304 10 94813.59 95111.39 95016.23 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 4 1000 22.18 22.37 22.27 8 1000 23.37 23.53 23.45 16 1000 22.11 22.27 22.19 32 1000 22.34 22.46 22.40 64 1000 24.84 25.03 24.93 128 1000 29.36 29.62 29.48 256 1000 36.06 36.27 36.17 512 1000 36.41 36.70 36.55 1024 1000 46.02 46.35 46.17 2048 1000 64.45 64.65 64.56 4096 1000 72.59 72.87 72.77 8192 1000 107.76 108.13 107.98 16384 1000 1247.11 1248.35 1247.73 32768 1000 2210.76 2212.95 2211.86 65536 640 2495.08 2498.99 2497.18 131072 320 4308.38 4321.82 4315.49 262144 160 8538.99 8592.39 8566.33 524288 80 16924.79 17136.19 17031.13 1048576 40 33381.10 34221.18 33799.57 2097152 20 88924.24 89544.45 89331.48 4194304 10 178744.01 179744.20 179390.08 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.02 0.01 4 1000 1.58 2.08 1.83 8 1000 36.38 36.42 36.40 16 1000 36.31 36.35 36.33 32 1000 36.67 36.70 36.69 64 1000 37.60 37.64 37.62 128 1000 41.68 41.71 41.69 256 1000 47.24 47.29 47.26 512 1000 53.48 53.51 53.49 1024 1000 63.99 64.04 64.02 2048 1000 84.64 84.69 84.66 4096 1000 123.08 123.13 123.10 8192 1000 182.90 182.98 182.94 16384 1000 285.92 286.04 285.98 32768 1000 491.11 491.29 491.20 65536 640 892.60 893.08 892.84 131072 320 2279.02 2279.29 2279.15 262144 160 3851.34 3851.39 3851.37 524288 80 7607.13 7607.17 7607.15 1048576 40 14551.85 14551.88 14551.86 2097152 20 29569.95 29570.40 29570.17 4194304 10 59600.09 59619.31 59609.70 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.02 0.02 0.02 4 1000 1.54 16.49 9.50 8 1000 2.46 521.70 387.79 16 1000 75.14 75.19 75.16 32 1000 72.40 72.41 72.40 64 1000 76.52 76.53 76.53 128 1000 93.45 93.45 93.45 256 1000 114.17 114.18 114.17 512 1000 120.88 120.89 120.89 1024 1000 144.25 144.27 144.26 2048 1000 237.47 237.51 237.49 4096 1000 124.57 124.60 124.59 8192 1000 182.32 182.38 182.35 16384 1000 299.08 299.16 299.11 32768 1000 1122.52 1122.67 1122.58 65536 640 1529.43 1529.87 1529.60 131072 320 2761.16 2762.68 2761.72 262144 160 7084.51 7088.81 7086.68 524288 80 11680.64 11695.89 11688.37 1048576 40 31946.18 32004.37 31975.80 2097152 20 64571.00 64800.16 64688.28 4194304 10 103722.69 104654.60 104193.02 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.02 0.03 0.02 4 1000 2.00 1030.59 703.00 8 1000 504.76 546.75 517.52 16 1000 1.96 94.37 51.69 32 1000 94.30 94.32 94.31 64 1000 92.88 92.92 92.90 128 1000 111.41 111.44 111.43 256 1000 489.52 489.54 489.53 512 1000 165.73 165.77 165.75 1024 1000 107.23 107.28 107.25 2048 1000 292.46 292.54 292.50 4096 1000 174.64 174.75 174.70 8192 1000 183.81 183.85 183.83 16384 1000 1346.82 1346.94 1346.87 32768 1000 2386.22 2386.47 2386.33 65536 640 2805.57 2806.11 2805.77 131072 320 4895.44 4896.98 4895.93 262144 160 9696.66 9703.70 9699.03 524288 80 21770.46 21795.06 21782.76 1048576 40 40701.85 40793.53 40748.27 2097152 20 102566.79 102917.96 102744.47 4194304 10 196802.12 198197.60 197498.22 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 1 1000 21.59 21.60 21.60 2 1000 21.22 21.25 21.23 4 1000 21.04 21.06 21.05 8 1000 21.22 21.26 21.24 16 1000 21.11 21.15 21.13 32 1000 22.42 22.47 22.44 64 1000 23.54 23.54 23.54 128 1000 31.41 31.44 31.42 256 1000 36.49 36.51 36.50 512 1000 41.67 41.70 41.69 1024 1000 36.60 36.63 36.62 2048 1000 50.88 50.91 50.90 4096 1000 75.93 75.93 75.93 8192 1000 108.86 108.89 108.87 16384 1000 176.38 176.45 176.41 32768 1000 311.64 311.70 311.67 65536 640 978.93 978.97 978.95 131072 320 1671.56 1671.71 1671.63 262144 160 2614.44 2614.51 2614.48 524288 80 4736.18 4736.75 4736.46 1048576 40 9427.42 9427.45 9427.44 2097152 20 19252.49 19255.35 19253.92 4194304 10 38808.01 38815.52 38811.77 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 1 1000 26.38 26.40 26.39 2 1000 26.42 26.44 26.43 4 1000 29.17 29.20 29.18 8 1000 26.75 26.80 26.78 16 1000 27.08 27.13 27.10 32 1000 27.11 27.16 27.14 64 1000 28.83 28.84 28.83 128 1000 41.99 42.05 42.02 256 1000 60.64 60.69 60.67 512 1000 90.16 90.20 90.18 1024 1000 52.00 52.01 52.01 2048 1000 70.50 70.55 70.52 4096 1000 121.88 121.91 121.89 8192 1000 194.21 194.22 194.21 16384 1000 800.13 800.42 800.28 32768 1000 2016.03 2016.56 2016.40 65536 640 4097.43 4097.49 4097.45 131072 320 7855.46 7855.61 7855.56 262144 160 14826.77 14827.09 14826.93 524288 80 34594.53 34595.14 34594.92 1048576 40 54784.27 54787.20 54785.73 2097152 20 108033.10 108037.85 108035.38 4194304 10 214608.31 214612.10 214610.11 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 1 1000 53.89 53.96 53.92 2 1000 53.52 53.60 53.56 4 1000 54.07 54.07 54.07 8 1000 54.48 54.55 54.52 16 1000 58.63 58.64 58.64 32 1000 65.67 65.74 65.71 64 1000 70.38 70.45 70.41 128 1000 84.35 84.36 84.35 256 1000 73.18 73.25 73.21 512 1000 78.31 78.32 78.31 1024 1000 83.87 83.94 83.91 2048 1000 117.10 117.24 117.17 4096 1000 205.82 205.86 205.84 8192 1000 1866.91 1867.36 1867.14 16384 1000 3733.79 3734.63 3734.21 32768 1000 8298.98 8299.57 8299.39 65536 640 16918.17 16918.26 16918.23 131072 320 32026.89 32027.10 32027.00 262144 160 68252.11 68252.87 68252.52 524288 80 123417.73 123418.56 123418.11 1048576 40 244130.90 244133.28 244132.21 2097152 20 485337.89 485340.01 485338.78 4194304 10 969159.60 969162.30 969160.87 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 1 1000 37.35 37.38 37.37 2 1000 42.94 42.99 42.96 4 1000 37.43 37.48 37.46 8 1000 37.31 37.36 37.34 16 1000 41.23 41.28 41.26 32 1000 38.59 38.59 38.59 64 1000 42.33 42.36 42.35 128 1000 48.11 48.15 48.13 256 1000 54.20 54.24 54.22 512 1000 64.76 64.81 64.79 1024 1000 85.24 85.29 85.26 2048 1000 123.17 123.23 123.20 4096 1000 182.71 182.79 182.75 8192 1000 282.70 282.89 282.79 16384 1000 482.40 482.72 482.56 32768 1000 923.12 923.13 923.13 65536 640 1793.88 1793.90 1793.89 131072 320 3414.68 3414.68 3414.68 262144 160 6647.93 6648.04 6647.98 524288 80 13152.92 13152.97 13152.95 1048576 40 26104.92 26105.05 26104.98 2097152 20 52013.25 52014.30 52013.78 4194304 10 104502.70 104503.39 104503.05 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.02 0.02 0.02 1 1000 425.30 425.30 425.30 2 1000 391.16 391.16 391.16 4 1000 493.09 493.11 493.10 8 1000 539.96 539.97 539.97 16 1000 466.26 466.26 466.26 32 1000 49.83 49.84 49.84 64 1000 52.97 52.98 52.98 128 1000 70.37 70.42 70.40 256 1000 102.26 102.32 102.29 512 1000 204.08 204.26 204.17 1024 1000 299.49 299.74 299.61 2048 1000 373.02 373.36 373.19 4096 1000 598.29 598.87 598.58 8192 1000 1031.39 1032.33 1031.87 16384 1000 8133.11 8134.40 8133.91 32768 1000 7383.01 7385.30 7384.42 65536 640 7377.39 7384.30 7381.68 131072 320 14322.71 14350.10 14339.82 262144 160 28099.45 28207.47 28167.05 524288 80 55546.04 55974.51 55813.47 1048576 40 110004.60 111709.70 111069.33 2097152 20 217014.00 223825.80 221270.35 4194304 10 421749.40 448979.40 438767.42 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.02 0.03 0.02 1 1000 116.98 117.03 117.01 2 1000 107.12 107.18 107.15 4 1000 220.48 220.51 220.50 8 1000 169.39 169.44 169.42 16 1000 68.46 68.51 68.48 32 1000 71.07 71.08 71.08 64 1000 78.42 78.46 78.44 128 1000 112.51 112.52 112.51 256 1000 645.29 645.60 645.44 512 1000 1075.85 1076.39 1076.12 1024 1000 809.83 810.52 810.18 2048 1000 1272.99 1274.15 1273.57 4096 1000 2229.84 2231.97 2230.91 8192 1000 4673.59 4677.48 4675.74 16384 1000 8630.66 8637.73 8634.63 32768 1000 16216.26 16229.71 16223.80 65536 640 31310.86 31351.51 31333.71 131072 320 61586.12 61747.43 61676.70 262144 160 121730.57 122371.63 122091.22 524288 80 241506.75 244064.56 242945.18 1048576 40 477230.90 487449.55 482978.81 2097152 20 935483.05 976359.20 958478.92 4194304 10 1792470.62 1955854.99 1884370.41 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 1 1000 21.55 21.58 21.56 2 1000 21.34 21.36 21.35 4 1000 20.95 20.97 20.96 8 1000 21.29 21.31 21.30 16 1000 21.22 21.25 21.23 32 1000 21.67 21.67 21.67 64 1000 23.09 23.10 23.09 128 1000 30.80 30.83 30.81 256 1000 36.72 36.73 36.72 512 1000 41.49 41.49 41.49 1024 1000 36.54 36.61 36.58 2048 1000 50.53 50.58 50.55 4096 1000 76.32 76.35 76.33 8192 1000 108.92 108.93 108.93 16384 1000 176.13 176.16 176.15 32768 1000 311.94 311.98 311.96 65536 640 777.15 777.16 777.16 131072 320 1481.08 1481.20 1481.14 262144 160 2586.79 2587.02 2586.91 524288 80 4670.66 4670.77 4670.72 1048576 40 9337.13 9337.43 9337.28 2097152 20 18286.60 18286.65 18286.62 4194304 10 37845.40 37852.31 37848.85 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.02 0.01 1 1000 37.47 37.51 37.49 2 1000 35.75 35.77 35.76 4 1000 37.75 37.79 37.77 8 1000 36.55 36.57 36.56 16 1000 36.65 36.66 36.65 32 1000 38.40 38.47 38.44 64 1000 43.76 43.82 43.80 128 1000 63.80 63.85 63.84 256 1000 69.43 69.48 69.46 512 1000 66.60 66.65 66.63 1024 1000 57.32 57.35 57.34 2048 1000 84.18 84.25 84.21 4096 1000 188.99 189.05 189.02 8192 1000 327.42 327.48 327.45 16384 1000 596.90 597.01 596.96 32768 1000 1135.64 1135.70 1135.67 65536 640 2902.94 2903.02 2902.99 131072 320 5811.52 5811.76 5811.65 262144 160 10296.50 10297.04 10296.77 524288 80 20909.51 20910.05 20909.74 1048576 40 39421.27 39444.63 39433.02 2097152 20 76515.95 76568.45 76541.56 4194304 10 150053.50 150172.00 150110.50 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 1 1000 57.52 57.54 57.53 2 1000 53.41 53.47 53.44 4 1000 53.92 53.95 53.94 8 1000 56.27 56.30 56.29 16 1000 55.08 55.13 55.10 32 1000 58.35 58.38 58.35 64 1000 59.38 59.51 59.46 128 1000 78.24 78.28 78.26 256 1000 83.96 84.02 84.00 512 1000 97.41 97.48 97.44 1024 1000 143.81 143.94 143.89 2048 1000 272.70 272.84 272.76 4096 1000 558.58 558.73 558.65 8192 1000 1107.82 1108.06 1107.96 16384 1000 2166.38 2166.80 2166.65 32768 1000 4286.92 4287.74 4287.48 65536 640 11117.69 11121.89 11120.35 131072 320 20911.23 20911.54 20911.41 262144 160 40738.67 40739.93 40739.50 524288 80 77572.66 77575.19 77574.32 1048576 40 151530.67 151535.58 151533.73 2097152 20 294800.90 294995.55 294922.44 4194304 10 578363.01 578818.30 578643.98 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 1 1000 18.37 18.38 18.37 2 1000 18.33 18.35 18.34 4 1000 18.34 18.35 18.34 8 1000 18.40 18.40 18.40 16 1000 18.31 18.32 18.31 32 1000 18.62 18.66 18.64 64 1000 21.50 21.50 21.50 128 1000 22.76 22.81 22.78 256 1000 24.97 24.98 24.97 512 1000 28.80 28.82 28.81 1024 1000 35.59 35.59 35.59 2048 1000 48.75 48.79 48.77 4096 1000 63.56 63.60 63.58 8192 1000 104.51 104.60 104.56 16384 1000 171.00 171.12 171.06 32768 1000 308.63 308.90 308.76 65536 640 583.98 584.74 584.36 131072 320 1192.07 1192.14 1192.10 262144 160 2286.96 2287.27 2287.11 524288 80 4716.63 4716.96 4716.79 1048576 40 9461.85 9462.48 9462.16 2097152 20 18967.15 18968.80 18967.97 4194304 10 38080.10 38081.79 38080.94 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 1 1000 2130.47 2130.51 2130.49 2 1000 1626.73 1626.77 1626.74 4 1000 3119.17 3119.21 3119.19 8 1000 28.42 28.46 28.44 16 1000 4653.32 4653.38 4653.35 32 1000 28.05 28.08 28.06 64 1000 38.09 38.13 38.11 128 1000 24.72 24.77 24.74 256 1000 27.31 27.35 27.33 512 1000 30.93 31.01 30.97 1024 1000 37.70 37.74 37.72 2048 1000 53.34 53.41 53.39 4096 1000 80.90 81.00 80.97 8192 1000 136.06 136.24 136.16 16384 1000 239.76 239.98 239.89 32768 1000 441.55 441.95 441.80 65536 640 836.90 838.00 837.63 131072 320 3886.38 3888.23 3887.29 262144 160 26307.83 26315.30 26313.21 524288 80 44822.60 44826.64 44825.33 1048576 40 63834.90 63845.45 63841.79 2097152 20 41929.75 42078.65 42026.15 4194304 10 82531.40 82774.40 82683.13 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 1 1000 96.50 96.55 96.52 2 1000 24.54 24.56 24.55 4 1000 25.43 25.45 25.44 8 1000 24.63 24.65 24.64 16 1000 25.70 25.71 25.70 32 1000 26.90 26.96 26.93 64 1000 23.33 23.36 23.34 128 1000 26.37 26.46 26.42 256 1000 28.94 28.99 28.97 512 1000 33.13 33.17 33.15 1024 1000 40.04 40.11 40.07 2048 1000 71.72 71.85 71.77 4096 1000 102.58 102.72 102.64 8192 1000 176.25 176.52 176.42 16384 1000 319.65 320.03 319.90 32768 1000 608.89 609.48 609.28 65536 640 1254.89 1256.69 1256.09 131072 320 6116.50 6119.84 6118.62 262144 160 5816.86 5817.15 5817.09 524288 80 11340.74 11345.26 11344.28 1048576 40 21955.53 21976.77 21973.23 2097152 20 42916.69 42920.05 42918.14 4194304 10 149752.12 151218.61 150717.78 #--------------------------------------------------- # Benchmarking Barrier # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 20.22 20.22 20.22 #--------------------------------------------------- # Benchmarking Barrier # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 27.17 27.19 27.18 #--------------------------------------------------- # Benchmarking Barrier # #processes = 8 #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 55.11 55.13 55.12