1 | use criterion::{criterion_group, criterion_main, Bencher, BenchmarkId, Criterion}; |
---|---|

2 | use half::prelude::*; |

3 | use std::{f32, f64, iter}; |

4 | |

5 | const SIMD_LARGE_BENCH_SLICE_LEN: usize = 1024; |

6 | |

7 | fn bench_f32_to_f16(c: &mut Criterion) { |

8 | let mut group = c.benchmark_group("Convert f16 From f32"); |

9 | for val in &[ |

10 | 0., |

11 | -0., |

12 | 1., |

13 | f32::MIN, |

14 | f32::MAX, |

15 | f32::MIN_POSITIVE, |

16 | f32::NEG_INFINITY, |

17 | f32::INFINITY, |

18 | f32::NAN, |

19 | f32::consts::E, |

20 | f32::consts::PI, |

21 | ] { |

22 | group.bench_with_input(BenchmarkId::new("f16::from_f32", val), val, |b, i| { |

23 | b.iter(|| f16::from_f32(*i)) |

24 | }); |

25 | } |

26 | } |

27 | |

28 | fn bench_f64_to_f16(c: &mut Criterion) { |

29 | let mut group = c.benchmark_group("Convert f16 From f64"); |

30 | for val in &[ |

31 | 0., |

32 | -0., |

33 | 1., |

34 | f64::MIN, |

35 | f64::MAX, |

36 | f64::MIN_POSITIVE, |

37 | f64::NEG_INFINITY, |

38 | f64::INFINITY, |

39 | f64::NAN, |

40 | f64::consts::E, |

41 | f64::consts::PI, |

42 | ] { |

43 | group.bench_with_input(BenchmarkId::new("f16::from_f64", val), val, |b, i| { |

44 | b.iter(|| f16::from_f64(*i)) |

45 | }); |

46 | } |

47 | } |

48 | |

49 | fn bench_f16_to_f32(c: &mut Criterion) { |

50 | let mut group = c.benchmark_group("Convert f16 to f32"); |

51 | for val in &[ |

52 | f16::ZERO, |

53 | f16::NEG_ZERO, |

54 | f16::ONE, |

55 | f16::MIN, |

56 | f16::MAX, |

57 | f16::MIN_POSITIVE, |

58 | f16::NEG_INFINITY, |

59 | f16::INFINITY, |

60 | f16::NAN, |

61 | f16::E, |

62 | f16::PI, |

63 | ] { |

64 | group.bench_with_input(BenchmarkId::new("f16::to_f32", val), val, |b, i| { |

65 | b.iter(|| i.to_f32()) |

66 | }); |

67 | } |

68 | } |

69 | |

70 | fn bench_f16_to_f64(c: &mut Criterion) { |

71 | let mut group = c.benchmark_group("Convert f16 to f64"); |

72 | for val in &[ |

73 | f16::ZERO, |

74 | f16::NEG_ZERO, |

75 | f16::ONE, |

76 | f16::MIN, |

77 | f16::MAX, |

78 | f16::MIN_POSITIVE, |

79 | f16::NEG_INFINITY, |

80 | f16::INFINITY, |

81 | f16::NAN, |

82 | f16::E, |

83 | f16::PI, |

84 | ] { |

85 | group.bench_with_input(BenchmarkId::new("f16::to_f64", val), val, |b, i| { |

86 | b.iter(|| i.to_f64()) |

87 | }); |

88 | } |

89 | } |

90 | |

91 | criterion_group!( |

92 | f16_sisd, |

93 | bench_f32_to_f16, |

94 | bench_f64_to_f16, |

95 | bench_f16_to_f32, |

96 | bench_f16_to_f64 |

97 | ); |

98 | |

99 | fn bench_slice_f32_to_f16(c: &mut Criterion) { |

100 | let mut constant_buffer = [f16::ZERO; 11]; |

101 | let constants = [ |

102 | 0., |

103 | -0., |

104 | 1., |

105 | f32::MIN, |

106 | f32::MAX, |

107 | f32::MIN_POSITIVE, |

108 | f32::NEG_INFINITY, |

109 | f32::INFINITY, |

110 | f32::NAN, |

111 | f32::consts::E, |

112 | f32::consts::PI, |

113 | ]; |

114 | c.bench_function( |

115 | "HalfFloatSliceExt::convert_from_f32_slice/constants", |

116 | |b: &mut Bencher<'_>| b.iter(|| constant_buffer.convert_from_f32_slice(&constants)), |

117 | ); |

118 | |

119 | let large: Vec<_> = iter::repeat(0) |

120 | .enumerate() |

121 | .map(|(i, _)| i as f32) |

122 | .take(SIMD_LARGE_BENCH_SLICE_LEN) |

123 | .collect(); |

124 | let mut large_buffer = [f16::ZERO; SIMD_LARGE_BENCH_SLICE_LEN]; |

125 | c.bench_function( |

126 | "HalfFloatSliceExt::convert_from_f32_slice/large", |

127 | |b: &mut Bencher<'_>| b.iter(|| large_buffer.convert_from_f32_slice(&large)), |

128 | ); |

129 | } |

130 | |

131 | fn bench_slice_f64_to_f16(c: &mut Criterion) { |

132 | let mut constant_buffer = [f16::ZERO; 11]; |

133 | let constants = [ |

134 | 0., |

135 | -0., |

136 | 1., |

137 | f64::MIN, |

138 | f64::MAX, |

139 | f64::MIN_POSITIVE, |

140 | f64::NEG_INFINITY, |

141 | f64::INFINITY, |

142 | f64::NAN, |

143 | f64::consts::E, |

144 | f64::consts::PI, |

145 | ]; |

146 | c.bench_function( |

147 | "HalfFloatSliceExt::convert_from_f64_slice/constants", |

148 | |b: &mut Bencher<'_>| b.iter(|| constant_buffer.convert_from_f64_slice(&constants)), |

149 | ); |

150 | |

151 | let large: Vec<_> = iter::repeat(0) |

152 | .enumerate() |

153 | .map(|(i, _)| i as f64) |

154 | .take(SIMD_LARGE_BENCH_SLICE_LEN) |

155 | .collect(); |

156 | let mut large_buffer = [f16::ZERO; SIMD_LARGE_BENCH_SLICE_LEN]; |

157 | c.bench_function( |

158 | "HalfFloatSliceExt::convert_from_f64_slice/large", |

159 | |b: &mut Bencher<'_>| b.iter(|| large_buffer.convert_from_f64_slice(&large)), |

160 | ); |

161 | } |

162 | |

163 | fn bench_slice_f16_to_f32(c: &mut Criterion) { |

164 | let mut constant_buffer = [0f32; 11]; |

165 | let constants = [ |

166 | f16::ZERO, |

167 | f16::NEG_ZERO, |

168 | f16::ONE, |

169 | f16::MIN, |

170 | f16::MAX, |

171 | f16::MIN_POSITIVE, |

172 | f16::NEG_INFINITY, |

173 | f16::INFINITY, |

174 | f16::NAN, |

175 | f16::E, |

176 | f16::PI, |

177 | ]; |

178 | c.bench_function( |

179 | "HalfFloatSliceExt::convert_to_f32_slice/constants", |

180 | |b: &mut Bencher<'_>| b.iter(|| constants.convert_to_f32_slice(&mut constant_buffer)), |

181 | ); |

182 | |

183 | let large: Vec<_> = iter::repeat(0) |

184 | .enumerate() |

185 | .map(|(i, _)| f16::from_f32(i as f32)) |

186 | .take(SIMD_LARGE_BENCH_SLICE_LEN) |

187 | .collect(); |

188 | let mut large_buffer = [0f32; SIMD_LARGE_BENCH_SLICE_LEN]; |

189 | c.bench_function( |

190 | "HalfFloatSliceExt::convert_to_f32_slice/large", |

191 | |b: &mut Bencher<'_>| b.iter(|| large.convert_to_f32_slice(&mut large_buffer)), |

192 | ); |

193 | } |

194 | |

195 | fn bench_slice_f16_to_f64(c: &mut Criterion) { |

196 | let mut constant_buffer = [0f64; 11]; |

197 | let constants = [ |

198 | f16::ZERO, |

199 | f16::NEG_ZERO, |

200 | f16::ONE, |

201 | f16::MIN, |

202 | f16::MAX, |

203 | f16::MIN_POSITIVE, |

204 | f16::NEG_INFINITY, |

205 | f16::INFINITY, |

206 | f16::NAN, |

207 | f16::E, |

208 | f16::PI, |

209 | ]; |

210 | c.bench_function( |

211 | "HalfFloatSliceExt::convert_to_f64_slice/constants", |

212 | |b: &mut Bencher<'_>| b.iter(|| constants.convert_to_f64_slice(&mut constant_buffer)), |

213 | ); |

214 | |

215 | let large: Vec<_> = iter::repeat(0) |

216 | .enumerate() |

217 | .map(|(i, _)| f16::from_f64(i as f64)) |

218 | .take(SIMD_LARGE_BENCH_SLICE_LEN) |

219 | .collect(); |

220 | let mut large_buffer = [0f64; SIMD_LARGE_BENCH_SLICE_LEN]; |

221 | c.bench_function( |

222 | "HalfFloatSliceExt::convert_to_f64_slice/large", |

223 | |b: &mut Bencher<'_>| b.iter(|| large.convert_to_f64_slice(&mut large_buffer)), |

224 | ); |

225 | } |

226 | |

227 | criterion_group!( |

228 | f16_simd, |

229 | bench_slice_f32_to_f16, |

230 | bench_slice_f64_to_f16, |

231 | bench_slice_f16_to_f32, |

232 | bench_slice_f16_to_f64 |

233 | ); |

234 | |

235 | fn bench_f32_to_bf16(c: &mut Criterion) { |

236 | let mut group = c.benchmark_group("Convert bf16 From f32"); |

237 | for val in &[ |

238 | 0., |

239 | -0., |

240 | 1., |

241 | f32::MIN, |

242 | f32::MAX, |

243 | f32::MIN_POSITIVE, |

244 | f32::NEG_INFINITY, |

245 | f32::INFINITY, |

246 | f32::NAN, |

247 | f32::consts::E, |

248 | f32::consts::PI, |

249 | ] { |

250 | group.bench_with_input(BenchmarkId::new("bf16::from_f32", val), val, |b, i| { |

251 | b.iter(|| bf16::from_f32(*i)) |

252 | }); |

253 | } |

254 | } |

255 | |

256 | fn bench_f64_to_bf16(c: &mut Criterion) { |

257 | let mut group = c.benchmark_group("Convert bf16 From f64"); |

258 | for val in &[ |

259 | 0., |

260 | -0., |

261 | 1., |

262 | f64::MIN, |

263 | f64::MAX, |

264 | f64::MIN_POSITIVE, |

265 | f64::NEG_INFINITY, |

266 | f64::INFINITY, |

267 | f64::NAN, |

268 | f64::consts::E, |

269 | f64::consts::PI, |

270 | ] { |

271 | group.bench_with_input(BenchmarkId::new("bf16::from_f64", val), val, |b, i| { |

272 | b.iter(|| bf16::from_f64(*i)) |

273 | }); |

274 | } |

275 | } |

276 | |

277 | fn bench_bf16_to_f32(c: &mut Criterion) { |

278 | let mut group = c.benchmark_group("Convert bf16 to f32"); |

279 | for val in &[ |

280 | bf16::ZERO, |

281 | bf16::NEG_ZERO, |

282 | bf16::ONE, |

283 | bf16::MIN, |

284 | bf16::MAX, |

285 | bf16::MIN_POSITIVE, |

286 | bf16::NEG_INFINITY, |

287 | bf16::INFINITY, |

288 | bf16::NAN, |

289 | bf16::E, |

290 | bf16::PI, |

291 | ] { |

292 | group.bench_with_input(BenchmarkId::new("bf16::to_f32", val), val, |b, i| { |

293 | b.iter(|| i.to_f32()) |

294 | }); |

295 | } |

296 | } |

297 | |

298 | fn bench_bf16_to_f64(c: &mut Criterion) { |

299 | let mut group = c.benchmark_group("Convert bf16 to f64"); |

300 | for val in &[ |

301 | bf16::ZERO, |

302 | bf16::NEG_ZERO, |

303 | bf16::ONE, |

304 | bf16::MIN, |

305 | bf16::MAX, |

306 | bf16::MIN_POSITIVE, |

307 | bf16::NEG_INFINITY, |

308 | bf16::INFINITY, |

309 | bf16::NAN, |

310 | bf16::E, |

311 | bf16::PI, |

312 | ] { |

313 | group.bench_with_input(BenchmarkId::new("bf16::to_f64", val), val, |b, i| { |

314 | b.iter(|| i.to_f64()) |

315 | }); |

316 | } |

317 | } |

318 | |

319 | criterion_group!( |

320 | bf16_sisd, |

321 | bench_f32_to_bf16, |

322 | bench_f64_to_bf16, |

323 | bench_bf16_to_f32, |

324 | bench_bf16_to_f64 |

325 | ); |

326 | |

327 | criterion_main!(f16_sisd, bf16_sisd, f16_simd); |

328 |