Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
187 changes: 157 additions & 30 deletions tests/test-backend-ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,14 +175,40 @@ static void init_tensor_kq_mask(ggml_tensor * tensor, float min = -1.0f, float m
ggml_backend_tensor_set(tensor, data_f16.data(), 0, data_f16.size()*sizeof(ggml_fp16_t));
}

static std::vector<float> ggml_get_float_value(uint8_t * buf, ggml_type type, size_t i, size_t bs,
bool quantized, std::vector<float> & vq) {
const auto * tt = ggml_get_type_traits(type);
std::vector<float> tv;
if (type == GGML_TYPE_F16) {
tv.push_back(ggml_fp16_to_fp32(*(ggml_fp16_t*)&buf[i]));
} else if (type == GGML_TYPE_BF16) {
tv.push_back(ggml_bf16_to_fp32(*(ggml_bf16_t*)&buf[i]));
} else if (type == GGML_TYPE_F32) {
tv.push_back(*(float *) &buf[i]);
} else if (type == GGML_TYPE_I64) {
tv.push_back((float)*(int64_t *) &buf[i]);
} else if (type == GGML_TYPE_I32) {
tv.push_back((float)*(int32_t *) &buf[i]);
} else if (type == GGML_TYPE_I16) {
tv.push_back((float)*(int16_t *) &buf[i]);
} else if (type == GGML_TYPE_I8) {
tv.push_back((float)*(int8_t *) &buf[i]);
} else if (quantized) {
tt->to_float(&buf[i], vq.data(), bs);
tv.insert(tv.end(), vq.begin(), vq.end());
} else {
GGML_ABORT("fatal error");
}
return tv;
}

static std::vector<float> tensor_to_float(const ggml_tensor * t) {
std::vector<float> tv;
tv.reserve(ggml_nelements(t));

std::vector<uint8_t> buf(ggml_nbytes(t));
ggml_backend_tensor_get(t, buf.data(), 0, ggml_nbytes(t));

const auto * tt = ggml_get_type_traits(t->type);
size_t bs = ggml_blck_size(t->type);
std::vector<float> vq(ggml_blck_size(t->type));
bool quantized = ggml_is_quantized(t->type);
Expand All @@ -193,26 +219,8 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
for (int64_t i1 = 0; i1 < t->ne[1]; i1++) {
for (int64_t i0 = 0; i0 < t->ne[0]; i0 += bs) {
size_t i = i3*t->nb[3] + i2*t->nb[2] + i1*t->nb[1] + i0/bs*t->nb[0];
if (t->type == GGML_TYPE_F16) {
tv.push_back(ggml_fp16_to_fp32(*(ggml_fp16_t*)&buf[i]));
} else if (t->type == GGML_TYPE_BF16) {
tv.push_back(ggml_bf16_to_fp32(*(ggml_bf16_t*)&buf[i]));
} else if (t->type == GGML_TYPE_F32) {
tv.push_back(*(float *) &buf[i]);
} else if (t->type == GGML_TYPE_I64) {
tv.push_back((float)*(int64_t *) &buf[i]);
} else if (t->type == GGML_TYPE_I32) {
tv.push_back((float)*(int32_t *) &buf[i]);
} else if (t->type == GGML_TYPE_I16) {
tv.push_back((float)*(int16_t *) &buf[i]);
} else if (t->type == GGML_TYPE_I8) {
tv.push_back((float)*(int8_t *) &buf[i]);
} else if (quantized) {
tt->to_float(&buf[i], vq.data(), bs);
tv.insert(tv.end(), vq.begin(), vq.end());
} else {
GGML_ABORT("fatal error");
}
const auto fvs = ggml_get_float_value(buf.data(), t->type, i, bs, quantized, vq);
tv.insert(tv.end(), fvs.begin(), fvs.end());
}
}
}
Expand All @@ -221,6 +229,103 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
return tv;
}

static std::string ggml_ne_string(const ggml_tensor * t) {
std::string str;
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
str += std::to_string(t->ne[i]);
if (i + 1 < GGML_MAX_DIMS) {
str += ", ";
}
}
return str;
}

static void ggml_print_tensor(ggml_tensor * t, int64_t n = 3) {
GGML_ASSERT(t != nullptr);
GGML_ASSERT(n > 0);

printf("%s: %24s = (%s) %10s(", __func__,
t->name, ggml_type_name(t->type), ggml_op_desc(t));

size_t last_src = 0;
for (size_t i = 0; i < GGML_MAX_SRC; ++i) {
if (t->src[i] != nullptr) {
last_src = i;
}
}
for (size_t i = 0; i < GGML_MAX_SRC; ++i) {
if (t->src[i] != nullptr) {
printf("%s{%s}", t->src[i]->name, ggml_ne_string(t->src[i]).c_str());
}
if (i < last_src) {
printf(", ");
}
}
printf(") = {%s}\n", ggml_ne_string(t).c_str());

std::vector<float> tv;
tv.reserve(ggml_nelements(t));

std::vector<uint8_t> buf(ggml_nbytes(t));
ggml_backend_tensor_get(t, buf.data(), 0, ggml_nbytes(t));

size_t bs = ggml_blck_size(t->type);
std::vector<float> vq(ggml_blck_size(t->type));
bool quantized = ggml_is_quantized(t->type);

float sum = 0;
for (int64_t i3 = 0; i3 < t->ne[3]; i3++) {
for (int64_t i2 = 0; i2 < t->ne[2]; i2++) {
for (int64_t i1 = 0; i1 < t->ne[1]; i1++) {
for (int64_t i0 = 0; i0 < t->ne[0]; i0 += bs) {
size_t i = i3*t->nb[3] + i2*t->nb[2] + i1*t->nb[1] + i0/bs*t->nb[0];
for (const auto & val : ggml_get_float_value(buf.data(), t->type, i, bs, quantized, vq)) {
sum += val;
}
}
}
}
}
for (int64_t i3 = 0; i3 < t->ne[3]; i3++) {
printf(" [\n");
for (int64_t i2 = 0; i2 < t->ne[2]; i2++) {
if (i2 == n && t->ne[2] > 2*n) {
printf(" ..., \n");
i2 = t->ne[2] - n;
}
printf(" [\n");
for (int64_t i1 = 0; i1 < t->ne[1]; i1++) {
if (i1 == n && t->ne[1] > 2*n) {
printf(" ..., \n");
i1 = t->ne[1] - n;
}
printf(" [");
for (int64_t i0 = 0; i0 < t->ne[0]; i0++) {
size_t i = i3*t->nb[3] + i2*t->nb[2] + i1*t->nb[1] + i0/bs*t->nb[0];
if (i0 == n && t->ne[0] > 2*n) {
printf("..., ");
i0 = t->ne[0] - n;
}
for (const auto & v : ggml_get_float_value(buf.data(), t->type, i, bs, quantized, vq)) {
printf("%12.4f", v);
}
if (i0 < t->ne[0] - 1) printf(", ");
}
printf("],\n");
}
printf(" ],\n");
}
printf(" ]\n");
printf(" sum = %f\n", sum);
}

// TODO: make this abort configurable/optional?
if (std::isnan(sum)) {
printf("encountered NaN - aborting\n");
exit(0);
}
}

// normalized mean squared error = mse(a, b) / mse(a, 0)
static double nmse(const float * a, const float * b, size_t n) {
double mse_a_b = 0.0;
Expand Down Expand Up @@ -993,6 +1098,8 @@ static std::unique_ptr<printer> create_printer(output_formats format) {
GGML_ABORT("invalid output format");
}

// test case definition

struct test_case {
virtual ~test_case() {}

Expand Down Expand Up @@ -1071,6 +1178,9 @@ struct test_case {

std::string current_op_name;

// set to true to print tensors
bool verbose = 0;

void add_sentinel(ggml_context * ctx) {
if (mode == MODE_PERF || mode == MODE_GRAD || mode == MODE_SUPPORT) {
return;
Expand Down Expand Up @@ -1220,13 +1330,15 @@ struct test_case {
// compare
struct callback_userdata {
bool ok;
int verbose;
double max_err;
ggml_backend_t backend1;
ggml_backend_t backend2;
};

callback_userdata ud {
true,
verbose,
max_nmse_err(),
backend1,
backend2
Expand All @@ -1251,6 +1363,11 @@ struct test_case {
}
}

if (ud->verbose) {
ggml_print_tensor(t1, ud->verbose >= 2 ? 1e10 : 3);
ggml_print_tensor(t2, ud->verbose >= 2 ? 1e10 : 3);
}

std::vector<float> f1 = tensor_to_float(t1);
std::vector<float> f2 = tensor_to_float(t2);

Expand Down Expand Up @@ -1280,11 +1397,12 @@ struct test_case {
double err = nmse(f1.data(), f2.data(), f1.size());
if (err > ud->max_err) {
printf("[%s] NMSE = %.9f > %.9f ", ggml_op_desc(t1), err, ud->max_err);
//for (int i = 0; i < (int) f1.size(); i++) {
// printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
//}
//printf("\n");
//exit(1);
if (ud->verbose) {
for (int i = 0; i < (int) f1.size(); i++) {
printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
}
printf("\n");
}
ud->ok = false;
}
return true;
Expand Down Expand Up @@ -6193,7 +6311,7 @@ static const ggml_type other_types[] = {
};

// Test cases for evaluation: should try to cover edge cases while using small input sizes to keep the runtime low
static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
static std::vector<std::unique_ptr<test_case>> make_test_cases_eval(int verbose = 0) {
std::vector<std::unique_ptr<test_case>> test_cases;
std::default_random_engine rng(0);

Expand Down Expand Up @@ -7329,6 +7447,11 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
test_cases.emplace_back(new test_falcon(2));
#endif

// set verbose on all test cases
for (auto & tc : test_cases) {
tc->verbose = verbose;
}

return test_cases;
}

Expand Down Expand Up @@ -7493,7 +7616,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
}

static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op_names_filter, const char * params_filter,
printer * output_printer) {
printer * output_printer, int verbose) {
auto filter_test_cases = [](std::vector<std::unique_ptr<test_case>> & test_cases, const char * params_filter) {
if (params_filter == nullptr) {
return;
Expand All @@ -7512,7 +7635,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
};

if (mode == MODE_TEST) {
auto test_cases = make_test_cases_eval();
auto test_cases = make_test_cases_eval(verbose);
filter_test_cases(test_cases, params_filter);
ggml_backend_t backend_cpu = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, NULL);
if (backend_cpu == NULL) {
Expand Down Expand Up @@ -7701,6 +7824,7 @@ static void usage(char ** argv) {
printf(" --output specifies output format (default: console, options: console, sql, csv)\n");
printf(" --list-ops lists all available GGML operations\n");
printf(" --show-coverage shows test coverage\n");
printf(" --verbose | -v print tensors during ops (can specify multiple times)\n");
}

int main(int argc, char ** argv) {
Expand All @@ -7709,6 +7833,7 @@ int main(int argc, char ** argv) {
const char * op_names_filter = nullptr;
const char * backend_filter = nullptr;
const char * params_filter = nullptr;
int verbose = 0;

for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "test") == 0) {
Expand Down Expand Up @@ -7756,6 +7881,8 @@ int main(int argc, char ** argv) {
} else if (strcmp(argv[i], "--show-coverage") == 0) {
show_test_coverage();
return 0;
} else if (strcmp(argv[i], "--verbose") == 0 || strcmp(argv[i], "-v") == 0) {
++verbose;
} else {
usage(argv);
return 1;
Expand Down Expand Up @@ -7808,7 +7935,7 @@ int main(int argc, char ** argv) {
false, "", ggml_backend_dev_description(dev),
total / 1024 / 1024, free / 1024 / 1024, true));

bool ok = test_backend(backend, mode, op_names_filter, params_filter, output_printer.get());
bool ok = test_backend(backend, mode, op_names_filter, params_filter, output_printer.get(), verbose);

if (ok) {
n_ok++;
Expand Down
Loading