Open MPI 1.2.6 BTL TCP (--mca btl_tcp_if_include eth0) Single quad-core Xeon (X5460, 3.16GHz, C1E sleeping disabled) Broadcom bnx2 NICs (rx-usecs=1, IRQ to all cores), no switch #--------------------------------------------------- # Intel (R) MPI Benchmark Suite V2.3, MPI-1 part #--------------------------------------------------- # Date : Thu Jan 22 09:40:36 2009 # Machine : x86_64# System : Linux # Release : 2.6.26-1-amd64 # Version : #1 SMP Mon Dec 15 17:25:36 UTC 2008 # # Minimum message length in bytes: 0 # Maximum message length in bytes: 4194304 # # MPI_Datatype : MPI_BYTE # MPI_Datatype for reductions : MPI_FLOAT # MPI_Op : MPI_SUM # # # List of Benchmarks to run: # PingPong # PingPing # Sendrecv # Exchange # Allreduce # Reduce # Reduce_scatter # Allgather # Allgatherv # Alltoall # Bcast # Barrier #--------------------------------------------------- # Benchmarking PingPong # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 25.14 0.00 1 1000 25.71 0.04 2 1000 25.68 0.07 4 1000 25.91 0.15 8 1000 25.77 0.30 16 1000 25.86 0.59 32 1000 28.96 1.05 64 1000 29.30 2.08 128 1000 30.02 4.07 256 1000 31.74 7.69 512 1000 35.83 13.63 1024 1000 42.59 22.93 2048 1000 55.59 35.13 4096 1000 82.61 47.29 8192 1000 135.32 57.73 16384 1000 202.41 77.19 32768 1000 352.55 88.64 65536 640 668.09 93.55 131072 320 10377.62 12.05 262144 160 2258.26 110.70 524288 80 4380.79 114.13 1048576 40 8633.40 115.83 2097152 20 17108.25 116.90 4194304 10 34082.70 117.36 #--------------------------------------------------- # Benchmarking PingPing # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 32.60 0.00 1 1000 32.63 0.03 2 1000 32.64 0.06 4 1000 34.28 0.11 8 1000 32.89 0.23 16 1000 34.14 0.45 32 1000 38.47 0.79 64 1000 40.21 1.52 128 1000 38.20 3.20 256 1000 42.47 5.75 512 1000 37.22 13.12 1024 1000 43.74 22.33 2048 1000 57.44 34.01 4096 1000 84.55 46.20 8192 1000 136.38 57.28 16384 1000 207.05 75.46 32768 1000 360.54 86.68 65536 640 701.43 89.10 131072 320 1282.53 97.46 262144 160 2381.25 104.99 524288 80 4661.74 107.26 1048576 40 10313.30 96.96 2097152 20 18501.31 108.10 4194304 10 36663.29 109.10 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 31.00 31.00 31.00 0.00 1 1000 30.65 30.66 30.66 0.06 2 1000 30.58 30.60 30.59 0.12 4 1000 33.67 33.71 33.69 0.23 8 1000 30.77 30.78 30.78 0.50 16 1000 31.16 31.20 31.18 0.98 32 1000 36.44 36.46 36.45 1.67 64 1000 35.70 35.76 35.73 3.41 128 1000 37.73 37.79 37.76 6.46 256 1000 41.85 41.85 41.85 11.67 512 1000 37.51 37.54 37.53 26.02 1024 1000 43.35 43.36 43.36 45.04 2048 1000 57.53 57.55 57.54 67.87 4096 1000 84.29 84.31 84.30 92.66 8192 1000 136.25 136.29 136.27 114.65 16384 1000 206.04 206.10 206.07 151.63 32768 1000 357.97 358.05 358.01 174.56 65536 640 1264.67 1265.59 1265.13 98.77 131072 320 2329.15 2332.66 2330.91 107.17 262144 160 4400.18 4413.69 4406.93 113.28 524288 80 8618.68 8642.09 8630.38 115.71 1048576 40 16963.50 17001.62 16982.56 117.64 2097152 20 29374.66 29466.50 29420.58 135.75 4194304 10 36364.79 36385.99 36375.39 219.86 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 44.06 44.12 44.08 0.00 1 1000 42.88 42.91 42.90 0.04 2 1000 41.03 41.10 41.07 0.09 4 1000 43.93 44.01 43.96 0.17 8 1000 41.37 41.45 41.42 0.37 16 1000 41.34 41.39 41.36 0.74 32 1000 41.09 41.21 41.15 1.48 64 1000 42.59 42.65 42.62 2.86 128 1000 40.69 40.79 40.73 5.99 256 1000 41.14 41.25 41.21 11.84 512 1000 41.32 41.36 41.34 23.61 1024 1000 45.99 46.09 46.04 42.38 2048 1000 58.29 58.41 58.34 66.87 4096 1000 87.76 87.87 87.82 88.90 8192 1000 137.68 137.81 137.75 113.38 16384 1000 266.97 267.14 267.06 116.98 32768 1000 534.01 534.63 534.39 116.90 65536 640 1255.04 1256.13 1255.58 99.51 131072 320 2344.19 2347.78 2346.77 106.48 262144 160 4573.39 4578.04 4575.73 109.22 524288 80 8814.47 8840.50 8827.48 113.12 1048576 40 17996.50 18073.65 18053.19 110.66 2097152 20 35155.95 35204.40 35180.69 113.62 4194304 10 69534.30 69757.51 69619.30 114.68 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # #processes = 8 #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 43.58 43.71 43.65 0.00 1 1000 46.23 46.32 46.28 0.04 2 1000 41.59 41.73 41.67 0.09 4 1000 42.83 42.98 42.90 0.18 8 1000 41.62 41.78 41.69 0.37 16 1000 43.12 43.21 43.17 0.71 32 1000 45.04 45.22 45.14 1.35 64 1000 42.33 42.46 42.41 2.87 128 1000 42.97 43.15 43.06 5.66 256 1000 44.99 45.09 45.05 10.83 512 1000 50.66 50.79 50.74 19.23 1024 1000 54.83 54.93 54.88 35.56 2048 1000 70.89 70.97 70.93 55.04 4096 1000 136.21 136.72 136.49 57.14 8192 1000 267.34 267.65 267.50 58.38 16384 1000 533.52 534.32 533.92 58.49 32768 1000 1066.19 1067.14 1066.66 58.57 65536 640 2224.25 2227.75 2226.18 56.11 131072 320 4378.68 4392.39 4386.86 56.92 262144 160 8738.46 8760.74 8750.93 57.07 524288 80 17201.38 17315.25 17274.30 57.75 1048576 40 34638.02 34703.28 34663.46 57.63 2097152 20 68910.10 69037.06 68993.29 57.94 4194304 10 137344.00 137701.49 137470.55 58.10 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 45.76 45.81 45.78 0.00 1 1000 45.05 45.09 45.07 0.08 2 1000 45.01 45.04 45.03 0.17 4 1000 45.60 45.64 45.62 0.33 8 1000 44.67 44.67 44.67 0.68 16 1000 43.41 43.42 43.41 1.41 32 1000 53.40 53.43 53.42 2.28 64 1000 51.05 51.06 51.06 4.78 128 1000 51.11 51.11 51.11 9.55 256 1000 80.72 80.73 80.72 12.10 512 1000 65.50 65.54 65.52 29.80 1024 1000 58.23 58.25 58.24 67.06 2048 1000 76.01 76.03 76.02 102.75 4096 1000 121.91 121.96 121.93 128.12 8192 1000 205.51 205.59 205.55 152.00 16384 1000 340.47 340.53 340.50 183.54 32768 1000 627.98 628.07 628.03 199.02 65536 640 1522.67 1522.80 1522.74 164.17 131072 320 2844.55 2848.06 2846.31 175.56 262144 160 5145.90 5146.16 5146.03 194.32 524288 80 9910.73 9913.85 9912.29 201.74 1048576 40 27216.10 27217.65 27216.88 146.96 2097152 20 40959.00 40969.46 40964.23 195.27 4194304 10 75961.49 75999.31 75980.40 210.53 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 75.86 75.87 75.87 0.00 1 1000 75.00 75.02 75.01 0.05 2 1000 76.32 76.40 76.38 0.10 4 1000 77.44 77.57 77.48 0.20 8 1000 76.64 76.65 76.65 0.40 16 1000 77.47 77.49 77.48 0.79 32 1000 80.07 80.11 80.09 1.52 64 1000 81.50 81.57 81.52 2.99 128 1000 81.74 81.79 81.77 5.97 256 1000 79.25 79.32 79.28 12.31 512 1000 76.30 76.33 76.31 25.59 1024 1000 70.60 70.62 70.61 55.32 2048 1000 91.46 91.51 91.49 85.38 4096 1000 152.71 152.79 152.75 102.26 8192 1000 270.34 270.47 270.41 115.54 16384 1000 533.88 534.10 533.99 117.02 32768 1000 1066.74 1067.02 1066.88 117.15 65536 640 2424.22 2425.12 2424.62 103.09 131072 320 4835.84 4839.18 4837.51 103.32 262144 160 9487.13 9500.45 9491.59 105.26 524288 80 18697.74 18704.95 18701.80 106.92 1048576 40 36536.18 36571.65 36545.48 109.37 2097152 20 70539.90 70646.11 70582.14 113.24 4194304 10 139532.21 139798.19 139688.60 114.45 #----------------------------------------------------------------------------- # Benchmarking Exchange # #processes = 8 #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 58.81 58.87 58.84 0.00 1 1000 60.98 61.04 61.01 0.06 2 1000 60.52 60.57 60.55 0.13 4 1000 59.88 60.00 59.95 0.25 8 1000 60.75 60.87 60.81 0.50 16 1000 62.25 62.33 62.31 0.98 32 1000 68.55 68.62 68.58 1.78 64 1000 67.19 67.32 67.25 3.63 128 1000 67.91 68.03 67.97 7.18 256 1000 68.45 68.58 68.51 14.24 512 1000 68.21 68.40 68.32 28.56 1024 1000 79.70 79.77 79.74 48.97 2048 1000 138.85 138.94 138.89 56.23 4096 1000 269.85 270.05 269.95 57.86 8192 1000 531.68 532.43 532.07 58.69 16384 1000 1067.37 1068.03 1067.73 58.52 32768 1000 2132.52 2135.14 2133.90 58.54 65536 640 4467.62 4470.33 4468.96 55.92 131072 320 8777.23 8787.56 8782.68 56.90 262144 160 17503.21 17514.49 17509.28 57.10 524288 80 34721.26 34800.61 34765.00 57.47 1048576 40 69861.42 69982.90 69953.74 57.16 2097152 20 137876.51 138052.15 137969.82 57.95 4194304 10 274113.70 274391.82 274214.39 58.31 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 4 1000 33.98 34.04 34.01 8 1000 33.53 33.56 33.54 16 1000 35.05 35.08 35.06 32 1000 36.84 36.84 36.84 64 1000 39.86 39.89 39.87 128 1000 39.45 39.48 39.46 256 1000 45.40 45.43 45.42 512 1000 39.07 39.11 39.09 1024 1000 44.35 44.38 44.37 2048 1000 59.89 59.95 59.92 4096 1000 86.68 86.76 86.72 8192 1000 141.10 141.16 141.13 16384 1000 275.50 275.51 275.50 32768 1000 419.63 419.72 419.67 65536 640 731.59 731.72 731.65 131072 320 2565.67 2567.51 2566.59 262144 160 3729.13 3735.36 3732.25 524288 80 4840.12 4840.42 4840.27 1048576 40 17109.27 17155.98 17132.62 2097152 20 34114.55 34190.40 34152.48 4194304 10 40268.80 40291.19 40280.00 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 4 1000 47.55 47.62 47.58 8 1000 46.91 46.95 46.93 16 1000 51.50 51.57 51.54 32 1000 58.07 58.09 58.08 64 1000 55.75 55.83 55.79 128 1000 58.73 58.76 58.74 256 1000 75.14 75.21 75.17 512 1000 79.41 79.45 79.43 1024 1000 66.04 66.14 66.09 2048 1000 76.86 76.91 76.89 4096 1000 128.13 128.13 128.13 8192 1000 218.07 218.07 218.07 16384 1000 526.18 526.27 526.23 32768 1000 834.05 834.12 834.08 65536 640 1601.85 1602.13 1601.99 131072 320 3200.69 3201.56 3201.12 262144 160 8225.41 8229.45 8227.42 524288 80 15694.70 15709.99 15702.20 1048576 40 30122.88 30142.93 30132.99 2097152 20 57350.10 57435.60 57394.22 4194304 10 113667.70 113954.90 113865.05 #---------------------------------------------------------------- # Benchmarking Allreduce # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.02 0.01 4 1000 76.40 76.42 76.41 8 1000 77.56 77.66 77.61 16 1000 76.56 76.59 76.57 32 1000 69.61 69.68 69.64 64 1000 67.29 67.42 67.37 128 1000 103.57 103.67 103.62 256 1000 74.46 74.48 74.47 512 1000 94.32 94.35 94.33 1024 1000 91.74 91.78 91.76 2048 1000 112.61 112.69 112.65 4096 1000 203.21 203.29 203.25 8192 1000 361.81 361.86 361.84 16384 1000 994.13 994.40 994.25 32768 1000 1909.88 1910.06 1909.96 65536 640 3744.88 3745.36 3745.12 131072 320 7475.51 7476.99 7476.23 262144 160 14940.03 14949.15 14944.57 524288 80 32339.59 32362.06 32350.87 1048576 40 63587.40 63663.90 63623.32 2097152 20 129759.10 129939.65 129857.72 4194304 10 250865.20 251432.09 251204.04 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 4 1000 26.01 26.06 26.04 8 1000 26.14 26.14 26.14 16 1000 26.18 26.20 26.19 32 1000 29.25 29.28 29.27 64 1000 29.60 29.67 29.64 128 1000 30.34 30.39 30.37 256 1000 32.19 32.23 32.21 512 1000 36.29 36.33 36.31 1024 1000 43.45 43.51 43.48 2048 1000 56.84 56.93 56.89 4096 1000 85.16 85.23 85.20 8192 1000 139.36 139.48 139.42 16384 1000 208.50 208.72 208.61 32768 1000 362.37 362.75 362.56 65536 640 627.19 628.11 627.65 131072 320 3874.09 3876.06 3875.07 262144 160 21603.87 21607.94 21605.91 524288 80 48238.40 48246.95 48242.68 1048576 40 61010.60 61025.85 61018.23 2097152 20 132240.05 132273.81 132256.93 4194304 10 245952.49 246015.19 245983.84 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.02 0.02 4 1000 30.18 30.31 30.25 8 1000 29.91 29.98 29.95 16 1000 30.14 30.23 30.18 32 1000 33.86 33.95 33.91 64 1000 34.06 34.24 34.16 128 1000 72.38 72.56 72.47 256 1000 37.05 37.17 37.11 512 1000 40.69 40.85 40.77 1024 1000 47.52 48.02 47.77 2048 1000 60.80 60.93 60.88 4096 1000 92.44 92.61 92.51 8192 1000 149.90 150.17 150.02 16384 1000 227.55 227.98 227.79 32768 1000 1082.99 1084.15 1083.57 65536 640 1357.45 1359.46 1358.56 131072 320 2219.09 2225.95 2222.86 262144 160 4326.58 4353.64 4341.94 524288 80 8543.13 8609.63 8582.28 1048576 40 20651.07 20744.97 20706.11 2097152 20 37971.34 38158.40 38079.97 4194304 10 72030.59 72315.00 72204.47 #---------------------------------------------------------------- # Benchmarking Reduce # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.02 0.01 4 1000 36.82 37.09 36.96 8 1000 36.23 36.46 36.35 16 1000 36.60 36.82 36.71 32 1000 38.64 38.89 38.76 64 1000 39.23 39.48 39.35 128 1000 40.50 40.74 40.62 256 1000 39.89 40.16 40.02 512 1000 44.27 44.58 44.42 1024 1000 51.51 51.84 51.67 2048 1000 86.80 87.03 86.95 4096 1000 79.07 79.38 79.27 8192 1000 120.73 121.18 120.98 16384 1000 1441.09 1442.48 1441.79 32768 1000 2509.74 2512.28 2511.02 65536 640 2795.69 2800.05 2798.03 131072 320 4344.27 4357.84 4351.58 262144 160 8575.18 8628.75 8604.82 524288 80 16998.31 17161.25 17092.96 1048576 40 33691.00 34196.38 34021.32 2097152 20 72119.95 72706.70 72493.19 4194304 10 139237.40 140429.40 139991.72 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.02 0.01 4 1000 3.06 3.09 3.08 8 1000 51.95 51.97 51.96 16 1000 52.10 52.11 52.11 32 1000 55.26 55.28 55.27 64 1000 58.68 58.72 58.70 128 1000 59.73 59.74 59.73 256 1000 62.22 62.28 62.25 512 1000 68.08 68.15 68.12 1024 1000 79.60 79.67 79.64 2048 1000 132.76 132.83 132.79 4096 1000 140.23 140.30 140.27 8192 1000 221.17 221.27 221.22 16384 1000 342.61 342.74 342.68 32768 1000 564.20 564.38 564.29 65536 640 979.94 980.46 980.20 131072 320 13428.80 13430.73 13429.76 262144 160 40506.32 40513.48 40509.90 524288 80 65602.64 65629.68 65616.16 1048576 40 141382.93 141437.92 141410.43 2097152 20 140997.45 141075.69 141036.57 4194304 10 262838.51 263016.10 262927.31 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.02 0.02 0.02 4 1000 0.84 10.44 4.39 8 1000 0.79 52.92 26.85 16 1000 118.56 118.62 118.59 32 1000 123.72 123.76 123.74 64 1000 131.27 131.31 131.29 128 1000 128.73 128.77 128.75 256 1000 134.07 134.10 134.09 512 1000 141.68 141.73 141.70 1024 1000 162.57 162.66 162.61 2048 1000 993.40 993.43 993.42 4096 1000 294.76 294.87 294.81 8192 1000 221.36 221.44 221.39 16384 1000 341.83 341.99 341.90 32768 1000 1280.81 1281.02 1280.90 65536 640 1688.94 1689.44 1689.14 131072 320 2839.27 2841.20 2840.03 262144 160 6623.61 6631.03 6627.27 524288 80 42736.06 42764.31 42750.02 1048576 40 26681.08 26785.60 26729.07 2097152 20 47669.35 47957.19 47810.12 4194304 10 94354.10 95281.48 94801.32 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.02 0.03 0.02 4 1000 0.85 19.69 5.28 8 1000 0.86 57.88 15.74 16 1000 0.93 115.35 57.99 32 1000 8277.76 8277.78 8277.78 64 1000 8880.25 8907.34 8893.79 128 1000 8199.88 8199.98 8199.93 256 1000 9200.39 9200.42 9200.40 512 1000 6491.06 6491.09 6491.07 1024 1000 210.14 210.20 210.17 2048 1000 825.95 826.01 825.97 4096 1000 380.75 380.81 380.79 8192 1000 235.01 235.09 235.04 16384 1000 1555.97 1556.08 1556.02 32768 1000 2708.66 2708.83 2708.72 65536 640 3148.06 3148.55 3148.25 131072 320 4967.91 4969.97 4968.68 262144 160 9766.37 9773.46 9768.79 524288 80 20185.81 20216.89 20201.21 1048576 40 85859.57 85964.57 85910.76 2097152 20 82262.80 82671.06 82455.14 4194304 10 156557.01 157970.40 157258.31 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 1 1000 33.06 33.08 33.07 2 1000 33.64 33.65 33.64 4 1000 32.45 32.51 32.48 8 1000 31.69 31.70 31.69 16 1000 33.53 33.53 33.53 32 1000 38.13 38.18 38.15 64 1000 38.62 38.65 38.63 128 1000 39.04 39.10 39.07 256 1000 42.93 42.97 42.95 512 1000 37.98 37.99 37.99 1024 1000 43.69 43.79 43.74 2048 1000 58.98 59.07 59.03 4096 1000 84.47 84.48 84.47 8192 1000 136.86 136.89 136.87 16384 1000 206.94 206.99 206.96 32768 1000 358.43 358.51 358.47 65536 640 1257.08 1258.03 1257.56 131072 320 2337.67 2340.86 2339.26 262144 160 4302.04 4315.63 4308.83 524288 80 7698.44 7722.10 7710.27 1048576 40 16724.50 16764.10 16744.30 2097152 20 19358.99 19365.35 19362.17 4194304 10 39105.42 39135.19 39120.30 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 1 1000 46.75 46.78 46.76 2 1000 50.92 50.93 50.92 4 1000 46.84 46.85 46.85 8 1000 47.66 47.74 47.70 16 1000 47.51 47.59 47.55 32 1000 55.93 55.95 55.94 64 1000 57.14 57.17 57.16 128 1000 57.06 57.13 57.09 256 1000 73.31 73.32 73.32 512 1000 76.47 76.48 76.47 1024 1000 68.34 68.41 68.37 2048 1000 83.08 83.14 83.11 4096 1000 128.20 128.24 128.22 8192 1000 219.01 219.10 219.06 16384 1000 800.46 800.76 800.60 32768 1000 1835.93 1836.48 1836.21 65536 640 3929.43 3931.32 3930.35 131072 320 7042.79 7049.78 7046.29 262144 160 14660.75 14674.08 14667.41 524288 80 28118.24 28140.79 28129.53 1048576 40 58348.30 58393.05 58368.57 2097152 20 110208.85 110323.55 110277.12 4194304 10 220152.00 220276.50 220217.79 #---------------------------------------------------------------- # Benchmarking Allgather # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.02 0.01 1 1000 77.56 77.57 77.57 2 1000 77.20 77.29 77.24 4 1000 79.24 79.30 79.27 8 1000 77.01 77.08 77.04 16 1000 78.30 78.33 78.32 32 1000 67.10 67.21 67.16 64 1000 66.79 66.81 66.80 128 1000 65.65 65.69 65.67 256 1000 76.98 77.00 76.99 512 1000 92.04 92.10 92.07 1024 1000 101.97 102.13 102.05 2048 1000 130.26 130.32 130.30 4096 1000 210.47 210.48 210.48 8192 1000 1867.57 1868.08 1867.81 16384 1000 3733.83 3734.81 3734.32 32768 1000 8089.25 8090.34 8089.79 65536 640 16063.84 16071.35 16067.34 131072 320 32504.08 32524.80 32515.56 262144 160 67595.11 67622.49 67611.11 524288 80 137673.69 137818.86 137750.72 1048576 40 284175.18 284960.30 284718.34 2097152 20 538766.60 539062.50 538950.20 4194304 10 1130170.30 1144454.91 1140678.58 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 1 1000 53.01 53.03 53.02 2 1000 53.11 53.13 53.12 4 1000 53.21 53.21 53.21 8 1000 53.34 53.38 53.36 16 1000 56.50 56.52 56.51 32 1000 59.74 59.79 59.76 64 1000 60.77 60.82 60.79 128 1000 63.34 63.34 63.34 256 1000 69.05 69.10 69.07 512 1000 80.84 80.88 80.86 1024 1000 709.91 709.94 709.93 2048 1000 140.15 140.21 140.18 4096 1000 221.54 221.71 221.62 8192 1000 338.99 339.23 339.11 16384 1000 556.75 557.10 556.92 32768 1000 1026.01 1026.61 1026.31 65536 640 7916.19 7917.90 7917.04 131072 320 4081.38 4087.96 4084.67 262144 160 6627.08 6638.79 6632.94 524288 80 13002.85 13021.28 13012.06 1048576 40 25727.83 25774.43 25751.13 2097152 20 51235.56 51307.06 51271.31 4194304 10 102622.32 102803.02 102712.67 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.02 0.02 0.02 1 1000 646.87 646.89 646.88 2 1000 679.39 679.43 679.41 4 1000 363.37 363.42 363.39 8 1000 66.67 66.68 66.68 16 1000 67.33 67.40 67.37 32 1000 78.33 78.37 78.35 64 1000 80.33 80.44 80.39 128 1000 83.02 83.05 83.04 256 1000 137.45 137.52 137.49 512 1000 855.33 855.50 855.42 1024 1000 292.60 292.92 292.77 2048 1000 462.00 462.40 462.20 4096 1000 691.75 692.35 692.05 8192 1000 1186.23 1187.26 1186.75 16384 1000 2363.30 2365.34 2364.34 32768 1000 13420.49 13424.00 13422.27 65536 640 7925.24 7935.70 7930.51 131072 320 15418.13 15451.28 15436.65 262144 160 29583.40 29700.56 29653.10 524288 80 56912.35 57362.94 57185.32 1048576 40 110415.27 112154.92 111488.95 2097152 20 217347.10 224236.00 221619.17 4194304 10 421573.59 448863.98 438579.62 #---------------------------------------------------------------- # Benchmarking Allgatherv # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.02 0.03 0.02 1 1000 371.72 371.76 371.74 2 1000 1599.75 1599.78 1599.77 4 1000 346.29 346.31 346.30 8 1000 785.32 785.35 785.34 16 1000 143.01 143.08 143.04 32 1000 3736.39 3736.51 3736.45 64 1000 3610.99 3651.10 3631.04 128 1000 4818.34 4818.41 4818.37 256 1000 2602.27 2602.76 2602.52 512 1000 622.14 622.79 622.47 1024 1000 1031.60 1032.62 1032.11 2048 1000 1478.85 1480.20 1479.54 4096 1000 2551.06 2553.48 2552.28 8192 1000 5022.54 5027.16 5024.89 16384 1000 25364.76 25373.12 25368.97 32768 1000 16942.56 16958.11 16950.36 65536 640 32760.67 32804.78 32784.19 131072 320 64823.17 64989.53 64914.70 262144 160 125803.38 126456.49 126165.65 524288 80 242283.95 244858.25 243723.96 1048576 40 481793.85 492035.13 487532.87 2097152 20 933428.80 974248.24 956351.40 4194304 10 1791155.79 1954181.60 1882766.14 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 1 1000 33.52 33.54 33.53 2 1000 32.51 32.53 32.52 4 1000 35.15 35.22 35.18 8 1000 33.52 33.52 33.52 16 1000 32.46 32.46 32.46 32 1000 36.65 36.67 36.66 64 1000 37.85 37.88 37.86 128 1000 38.15 38.16 38.15 256 1000 42.04 42.06 42.05 512 1000 37.49 37.50 37.49 1024 1000 43.70 43.73 43.71 2048 1000 57.81 57.83 57.82 4096 1000 84.70 84.71 84.70 8192 1000 136.94 136.94 136.94 16384 1000 207.02 207.08 207.05 32768 1000 358.50 358.58 358.54 65536 640 1267.64 1268.47 1268.05 131072 320 2334.27 2337.86 2336.07 262144 160 4421.18 4434.69 4427.93 524288 80 5887.50 5910.86 5899.18 1048576 40 17281.31 17320.73 17301.02 2097152 20 19599.15 19606.40 19602.78 4194304 10 38958.72 38967.80 38963.26 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 1 1000 74.57 74.61 74.59 2 1000 74.99 75.02 75.00 4 1000 75.27 75.30 75.28 8 1000 74.83 74.92 74.90 16 1000 74.42 74.49 74.46 32 1000 79.57 79.58 79.58 64 1000 79.70 79.78 79.75 128 1000 78.01 78.07 78.05 256 1000 77.06 77.14 77.10 512 1000 78.14 78.19 78.16 1024 1000 76.61 76.63 76.62 2048 1000 91.47 91.50 91.48 4096 1000 213.05 213.14 213.09 8192 1000 352.52 352.62 352.57 16384 1000 628.84 628.95 628.90 32768 1000 1223.05 1223.35 1223.20 65536 640 2965.19 2966.95 2965.99 131072 320 5689.43 5692.86 5691.14 262144 160 11444.76 11449.71 11447.23 524288 80 20927.84 20962.41 20945.00 1048576 40 40125.88 40196.57 40174.53 2097152 20 77100.30 77217.30 77153.08 4194304 10 151495.31 151590.90 151555.00 #---------------------------------------------------------------- # Benchmarking Alltoall # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 1 1000 84.96 85.21 85.08 2 1000 84.92 84.98 84.95 4 1000 87.38 87.52 87.45 8 1000 85.08 85.14 85.11 16 1000 86.51 86.52 86.51 32 1000 94.13 94.32 94.24 64 1000 97.83 97.97 97.94 128 1000 96.61 96.83 96.76 256 1000 103.99 104.04 104.01 512 1000 114.67 114.69 114.68 1024 1000 165.13 165.19 165.16 2048 1000 277.95 278.08 278.01 4096 1000 613.79 613.89 613.84 8192 1000 1102.30 1102.53 1102.42 16384 1000 2179.69 2180.15 2179.94 32768 1000 4357.07 4357.98 4357.68 65536 640 10610.87 10615.36 10613.14 131072 320 21485.20 21499.35 21491.45 262144 160 40601.98 40649.00 40631.95 524288 80 77518.06 77607.86 77570.51 1048576 40 152358.38 152382.05 152373.58 2097152 20 298798.75 299194.90 299074.93 4194304 10 593081.31 594257.00 593855.56 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 1 1000 26.29 26.29 26.29 2 1000 26.26 26.29 26.28 4 1000 26.33 26.37 26.35 8 1000 26.31 26.37 26.34 16 1000 26.45 26.47 26.46 32 1000 29.50 29.53 29.52 64 1000 29.86 29.90 29.88 128 1000 30.54 30.54 30.54 256 1000 32.27 32.30 32.28 512 1000 36.38 36.41 36.40 1024 1000 43.52 43.54 43.53 2048 1000 52.30 52.37 52.34 4096 1000 74.67 74.73 74.70 8192 1000 113.31 113.41 113.36 16384 1000 199.80 199.94 199.87 32768 1000 335.30 335.58 335.44 65536 640 616.47 617.26 616.86 131072 320 2286.93 2288.76 2287.84 262144 160 3645.64 3652.44 3649.04 524288 80 30282.70 30296.54 30289.62 1048576 40 40811.05 40839.18 40825.11 2097152 20 80478.16 80531.60 80504.88 4194304 10 184483.50 184591.91 184537.71 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.01 0.01 1 1000 57.42 57.47 57.45 2 1000 2244.55 2244.56 2244.56 4 1000 1155.69 1155.77 1155.73 8 1000 1148.28 1148.36 1148.32 16 1000 2012.10 2012.15 2012.12 32 1000 31.48 31.53 31.51 64 1000 31.81 31.91 31.86 128 1000 32.53 32.53 32.53 256 1000 34.31 34.41 34.36 512 1000 37.83 37.92 37.88 1024 1000 44.93 45.02 44.97 2048 1000 65.89 65.98 65.94 4096 1000 92.87 93.02 92.97 8192 1000 147.59 147.73 147.69 16384 1000 264.71 264.97 264.88 32768 1000 477.21 477.57 477.44 65536 640 1107.56 1108.85 1108.32 131072 320 2105.60 2111.09 2108.40 262144 160 3944.93 3965.81 3956.12 524288 80 8572.95 8653.65 8618.15 1048576 40 16956.03 17181.03 17076.76 2097152 20 37781.86 38032.95 37925.25 4194304 10 69371.41 69923.09 69696.33 #---------------------------------------------------------------- # Benchmarking Bcast # #processes = 8 #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.01 0.02 0.01 1 1000 94.68 94.75 94.71 2 1000 1934.72 1934.77 1934.74 4 1000 780.93 781.03 780.98 8 1000 812.48 812.53 812.50 16 1000 968.96 969.04 969.00 32 1000 31.63 31.66 31.65 64 1000 32.15 32.24 32.19 128 1000 32.80 32.85 32.82 256 1000 34.69 34.72 34.70 512 1000 75.80 75.90 75.85 1024 1000 45.26 45.38 45.33 2048 1000 79.30 79.41 79.37 4096 1000 111.91 112.08 112.02 8192 1000 187.36 187.61 187.53 16384 1000 338.89 339.31 339.16 32768 1000 665.48 666.35 666.04 65536 640 1523.05 1527.02 1525.72 131072 320 3067.42 3077.43 3072.45 262144 160 6649.78 6686.11 6671.39 524288 80 15966.59 16136.16 16075.01 1048576 40 32095.40 32583.93 32411.06 2097152 20 66269.70 67963.76 67482.91 4194304 10 132303.71 134647.42 133782.26 #--------------------------------------------------- # Benchmarking Barrier # #processes = 2 # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 31.29 31.30 31.29 #--------------------------------------------------- # Benchmarking Barrier # #processes = 4 # ( 4 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 49.48 49.49 49.49 #--------------------------------------------------- # Benchmarking Barrier # #processes = 8 #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 76.50 76.58 76.54