本文展示开源 C 库 bfdev 的大数运算模块。我们将分别在 x86 Linux 平台和 py32f030 为代表的嵌入式平台上分别进行演示,并进行性能测试。
关于 bfdev 库,这是一个开源的 C 语言算法库, 它具有:良好的可移植性,面向对象的方法设计、安装部署简单等等优势。
MPI 即 Multi precision integer (多精度整数),就是对很大的数进行一系列的运算。在数学中,数的大小是没有上限的,但是在计算机中,由于受 ALU 字长的限制,处理器无法对其进行直接计算,我们就需要用到多精度整数算法。
我们先给出两个平台基于 MPI 使用 machin 公式 计算圆周率的测试代码,再进行说明。
#define MODULE_NAME "mpi-machin"
#define bfdev_log_fmt(fmt) MODULE_NAME ": " fmt
#include <stdio.h>
#include <bfdev/mpi.h>
#include <bfdev/log.h>
#include "../time.h"
#include "helper.h"
#define TEST_LEN 10000
#define TEST_SIZE (TEST_LEN / 4 + 1)
#define TEST_LOOP (TEST_LEN / 1.39793 + 1)
#define PRINT_RESULT 1
int main(int argc, const char *argv[])
{
bfdev_mpi_t *vw, *vs, *vv, *vq;
unsigned int k;
int retval;
if (!((vw = bfdev_mpi_create(NULL)) &&
(vs = bfdev_mpi_create(NULL)) &&
(vv = bfdev_mpi_create(NULL)) &&
(vq = bfdev_mpi_create(NULL))))
return 1;
/**
* Machin-like formula:
* PI = 16arctan(1/5) - 4arctan(1/239)
*
* These formulas are used in conjunction with Gregory's
* series, the Taylor series expansion for arctangent:
* arctan(x) = x - (x^3)/3 + (x^5)/5 - (x^7)/7 + ...
*/
if ((retval = bfdev_mpi_seti(vw, 16 * 5)) ||
(retval = bfdev_mpi_seti(vv, 4 * 239)) ||
(retval = bfdev_mpi_seti(vq, 10000)))
return retval;
for (k = 0; k < TEST_SIZE; ++k) {
if ((retval = bfdev_mpi_mul(vw, vw, vq)) ||
(retval = bfdev_mpi_mul(vv, vv, vq)))
return retval;
}
bfdev_log_info("Convergence Machin %d:\n", TEST_LEN);
EXAMPLE_TIME_STATISTICAL(
for (k = 1; k <= TEST_LOOP; ++k) {
if ((retval = bfdev_mpi_divi(vw, vw, vw, 5 * 5)) ||
(retval = bfdev_mpi_divi(vv, vv, vv, 239 * 239)) ||
(retval = bfdev_mpi_sub(vq, vw, vv)) ||
(retval = bfdev_mpi_divi(vq, vq, vq, 2 * k - 1)))
return retval;
if (k & 1)
retval = bfdev_mpi_add(vs, vs, vq);
else
retval = bfdev_mpi_sub(vs, vs, vq);
if (retval)
return retval;
}
0;
);
#if PRINT_RESULT
char *result;
result = print_num(vs, 10);
if (!result)
return 1;
printf("%c.", *result);
puts(result + 1);
free(result);
#endif
bfdev_mpi_destory(vw);
bfdev_mpi_destory(vs);
bfdev_mpi_destory(vv);
bfdev_mpi_destory(vq);
return 0;
}
以下是运算 pi 后 10000 位的结果,共使用了 0.36 秒。
❯ ./build/examples/mpi/mpi-machin
[info] mpi-machin: Convergence Machin 10000:
[info] mpi-machin: real time: 0.360000
[info] mpi-machin: user time: 0.350000
[info] mpi-machin: kern time: 0.000000
3.14159
...
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <errno.h>
#include <bfdev.h>
#include "main.h"
#include "py32f0xx_hal.h"
#define TEST_LEN 100
#define TEST_SIZE (TEST_LEN / 4 + 1)
#define TEST_LOOP (TEST_LEN / 1.39793 + 1)
int mpi_benchmark(void)
{
uint32_t start, time;
bfdev_mpi_t *vw, *vs, *vv, *vq;
unsigned int k;
int retval;
if (!((vw = bfdev_mpi_create(NULL)) &&
(vs = bfdev_mpi_create(NULL)) &&
(vv = bfdev_mpi_create(NULL)) &&
(vq = bfdev_mpi_create(NULL))))
return 1;
if ((retval = bfdev_mpi_seti(vw, 16 * 5)) ||
(retval = bfdev_mpi_seti(vv, 4 * 239)) ||
(retval = bfdev_mpi_seti(vq, 10000)))
return retval;
for (k = 0; k < TEST_SIZE; ++k) {
if ((retval = bfdev_mpi_mul(vw, vw, vq)) ||
(retval = bfdev_mpi_mul(vv, vv, vq)))
return retval;
}
bfdev_log_info("Convergence Machin %d:\n", TEST_LEN);
start = HAL_GetTick();
for (k = 1; k <= TEST_LOOP; ++k) {
if ((retval = bfdev_mpi_divi(vw, vw, vw, 5 * 5)) ||
(retval = bfdev_mpi_divi(vv, vv, vv, 239 * 239)) ||
(retval = bfdev_mpi_sub(vq, vw, vv)) ||
(retval = bfdev_mpi_divi(vq, vq, vq, 2 * k - 1)))
return retval;
if (k & 1)
retval = bfdev_mpi_add(vs, vs, vq);
else
retval = bfdev_mpi_sub(vs, vs, vq);
if (retval)
return retval;
iwdg_touch();
}
bfdev_mpi_destory(vw);
bfdev_mpi_destory(vs);
bfdev_mpi_destory(vv);
bfdev_mpi_destory(vq);
iwdg_touch();
time = HAL_GetTick() - start;
bfdev_log_notice("Total time: %lu.%lus\n", time / 1000, time % 1000);
return 0;
}
以下是运算结果,由于 py32f030 只有 4KiB RAM ,我们这里只计算 100 位。
[info] benchmark: Benchmark for PY32F0xx.
[info] benchmark: Bfdev version: 1.0.1-devel
[info] benchmark: This may take a few minutes...
[info] Convergence Machin 100:
[notice] Total time: 0.21s
...
我们计算的大体流程如下:
bfdev 中,使用 MPI 需要引入 <bfdev/mpi.h>
头文件,或者引入 <bfdev.h>
顶层头文件也可以。
这里要重点介绍一下 bfdev_mpi_create(alloc)
函数,它使用了 bfdev 的 allocator 兼容层,传入 NULL 将使用系统默认内存分配器,用户也可以使用自己的内存池为 MPI 分配内存。
欢迎各位感兴趣的读者去访问 bfdev 仓库。