| #include "common.h" |
| #include "llama.h" |
| #include "ggml.h" |
|
|
| #ifdef GGML_USE_CUDA |
| #include "ggml-cuda.h" |
| #endif |
|
|
| #ifdef GGML_USE_METAL |
| #include "ggml-metal.h" |
| #endif |
|
|
| #include <cstdio> |
| #include <ctime> |
| #include <random> |
| #include <string> |
| #include <vector> |
|
|
| #define DEBUG_POS 5 |
|
|
| static void print_debug_tensor(struct ggml_tensor * t, bool with_data = true) { |
| printf("%s: %s (%s): [%d, %d]\n", __func__, t->name, ggml_type_name(t->type), (int) t->ne[0], (int) t->ne[1]); |
| if (!with_data) return; |
| printf("%s: %s[0] = [", __func__, t->name); |
| for (size_t i = 0; i <= DEBUG_POS; i++) { |
| printf(" %f,", ggml_get_f32_nd(t, i, 0, 0, 0)); |
| } |
| printf(" ... ]\n"); |
| } |
|
|
| namespace PCA { |
|
|
| |
| struct pca_params { |
| int n_threads = 1; |
| int n_batch = 20; |
| int n_iterations = 1000; |
| float tolerance = 1e-7; |
|
|
| |
| int i_layer = 0; |
| int n_layers = 0; |
| }; |
|
|
| |
| struct pca_result { |
| struct ggml_tensor * calculated_square = NULL; |
| std::vector<struct ggml_tensor *> eigenvectors; |
| std::vector<float> distances; |
| }; |
|
|
| struct pca_model { |
| ggml_backend_t backend = NULL; |
| ggml_backend_buffer_t buffer; |
| struct ggml_context * ctx; |
| struct ggml_context * ctx_host; |
|
|
| |
| struct ggml_tensor * dev_input; |
| struct ggml_tensor * dev_square; |
| struct ggml_tensor * dev_eigenvector; |
|
|
| pca_model(struct ggml_tensor * t_input) { |
| #ifdef GGML_USE_CUDA |
| fprintf(stderr, "%s: using CUDA backend\n", __func__); |
| backend = ggml_backend_cuda_init(0); |
| if (!backend) { |
| fprintf(stderr, "%s: ggml_backend_cuda_init() failed\n", __func__); |
| } |
| #endif |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| if (!backend) { |
| backend = ggml_backend_cpu_init(); |
| } |
|
|
| const int num_tensors = 4; |
| struct ggml_init_params params { |
| ggml_tensor_overhead() * num_tensors, |
| NULL, |
| true, |
| }; |
| ctx = ggml_init(params); |
|
|
| auto n_samples = t_input->ne[0]; |
| auto n_embd = t_input->ne[1]; |
|
|
| dev_input = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_samples, n_embd); |
| dev_square = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_embd); |
| dev_eigenvector = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd); |
|
|
| ggml_set_name(dev_input, "dev_input"); |
| ggml_set_name(dev_square, "dev_square"); |
| ggml_set_name(dev_eigenvector, "dev_eigenvector"); |
| buffer = ggml_backend_alloc_ctx_tensors(ctx, backend); |
| ggml_backend_tensor_set(dev_input, t_input->data, 0, ggml_nbytes(t_input)); |
|
|
| |
| { |
| std::vector<float> random_vec(ggml_nelements(dev_eigenvector), 0.0); |
| std::default_random_engine generator(static_cast<unsigned int>(std::time(0))); |
| std::uniform_real_distribution<float> distribution(0.0, 1.0); |
| float sum_sqr = 0.0; |
| for (size_t i = 0; i < random_vec.size(); ++i) { |
| float f = distribution(generator); |
| sum_sqr += f * f; |
| random_vec[i] = f; |
| } |
| |
| float random_vec_norm = std::sqrt(sum_sqr); |
| for (size_t i = 0; i < random_vec.size(); ++i) { |
| random_vec[i] /= random_vec_norm; |
| } |
| ggml_backend_tensor_set(dev_eigenvector, random_vec.data(), 0, ggml_nbytes(dev_eigenvector)); |
| } |
| } |
|
|
| ~pca_model() { |
| ggml_free(ctx); |
| ggml_backend_buffer_free(buffer); |
| ggml_backend_free(backend); |
| } |
| }; |
|
|
| static struct ggml_cgraph * build_graph_piter( |
| const struct pca_params & params, |
| const pca_model & model, |
| bool calc_square = false) { |
| GGML_ASSERT(params.n_batch > 0); |
| |
| static size_t buf_size = ggml_tensor_overhead()*GGML_DEFAULT_GRAPH_SIZE + ggml_graph_overhead(); |
| static std::vector<uint8_t> buf(buf_size); |
|
|
| struct ggml_init_params params0 = { |
| buf_size, |
| buf.data(), |
| true, |
| }; |
| |
| struct ggml_context * ctx0 = ggml_init(params0); |
| struct ggml_cgraph * gf = ggml_new_graph(ctx0); |
|
|
| |
| struct ggml_tensor * tmp_square; |
| if (calc_square) { |
| tmp_square = ggml_mul_mat(ctx0, model.dev_input, model.dev_input); |
| ggml_set_name(tmp_square, "tmp_square"); |
| } |
|
|
| struct ggml_tensor * b_tensor; |
| struct ggml_tensor * distance; |
| struct ggml_tensor * old_eigen = model.dev_eigenvector; |
| struct ggml_tensor * input_square = calc_square ? tmp_square : model.dev_square; |
|
|
| for (int i = 0; i < params.n_batch; ++i) { |
| |
| b_tensor = ggml_mul_mat(ctx0, input_square, old_eigen); |
| ggml_set_name(b_tensor, "b_tensor"); |
|
|
| |
| b_tensor = ggml_div_inplace(ctx0, |
| b_tensor, |
| ggml_sqrt_inplace(ctx0, ggml_sum_rows(ctx0, ggml_sqr(ctx0, b_tensor))) |
| ); |
| ggml_format_name(b_tensor, "b_tensor_norm_%d", i); |
|
|
| |
| |
| struct ggml_tensor * new_sub_old = ggml_add(ctx0, old_eigen, ggml_scale(ctx0, b_tensor, -1)); |
| distance = ggml_sqrt_inplace(ctx0, |
| ggml_sum_rows(ctx0, ggml_sqr_inplace(ctx0, new_sub_old))); |
| ggml_format_name(distance, "distance_%d", i); |
|
|
| old_eigen = b_tensor; |
|
|
| |
| ggml_build_forward_expand(gf, distance); |
| } |
|
|
| |
| ggml_free(ctx0); |
| return gf; |
| } |
|
|
| static ggml_status compute_piter( |
| const struct pca_params & params, |
| const pca_model & model, |
| struct ggml_cgraph * gf, |
| ggml_gallocr_t allocr, |
| struct pca_result & result) { |
| |
| ggml_gallocr_alloc_graph(allocr, gf); |
|
|
| if (ggml_backend_is_cpu(model.backend)) { |
| ggml_backend_cpu_set_n_threads(model.backend, params.n_threads); |
| } |
|
|
| ggml_status res = ggml_backend_graph_compute(model.backend, gf); |
| if (res == GGML_STATUS_SUCCESS) { |
| auto extract_i = [](std::string prefix, std::string str) -> int { |
| int i = -1; |
| if (str.rfind(prefix, 0) == 0) { |
| sscanf(str.c_str(), (prefix + "%d").c_str(), &i); |
| } |
| return i; |
| }; |
| result.calculated_square = NULL; |
| result.eigenvectors.clear(); |
| result.distances.clear(); |
| result.eigenvectors.resize(params.n_batch); |
| result.distances.resize(params.n_batch); |
| |
| for (int i = 0; i < ggml_graph_n_nodes(gf); ++i) { |
| auto node = ggml_graph_node(gf, i); |
| int iter = -1; |
| |
| if ((iter = extract_i("b_tensor_norm_", node->name)) > -1) { |
| result.eigenvectors[iter] = node; |
| } |
| |
| if ((iter = extract_i("distance_", node->name)) > -1) { |
| float d; |
| ggml_backend_tensor_get(node, &d, 0, sizeof(float)); |
| result.distances[iter] = d; |
| |
| } |
| |
| if (std::string(node->name) == "tmp_square") { |
| result.calculated_square = node; |
| } |
| } |
| } |
| return res; |
| } |
|
|
| static void power_iteration( |
| const struct pca_params & params, |
| struct ggml_tensor * input, |
| struct ggml_tensor * output) { |
| |
| struct pca_model model(input); |
|
|
| ggml_gallocr_t allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(model.backend)); |
| struct pca_result result; |
| struct ggml_tensor * last_eigenvector = NULL; |
|
|
| int n_iters = params.n_iterations / params.n_batch; |
| for (int iter = 0; iter < n_iters; ++iter) { |
| bool calc_square = (iter == 0); |
| struct ggml_cgraph * gf = build_graph_piter(params, model, calc_square); |
| |
| compute_piter(params, model, gf, allocr, result); |
|
|
| for (size_t k = 0; k < result.distances.size(); ++k) { |
| last_eigenvector = result.eigenvectors[k]; |
| if (result.distances[k] < params.tolerance) { |
| break; |
| } |
| } |
|
|
| if (calc_square) { |
| |
| GGML_ASSERT(result.calculated_square != NULL); |
| ggml_backend_tensor_copy(result.calculated_square, model.dev_square); |
| } |
|
|
| { |
| |
| GGML_ASSERT(last_eigenvector != NULL); |
| ggml_backend_tensor_copy(last_eigenvector, model.dev_eigenvector); |
| } |
|
|
| printf("%s: layer %d/%d, iteration: %d / total: %d (batch = %d) ...\n", |
| __func__, params.i_layer+1, params.n_layers, iter+1, n_iters, params.n_batch); |
| } |
|
|
| |
| GGML_ASSERT(last_eigenvector); |
| ggml_backend_tensor_get(last_eigenvector, output->data, 0, ggml_nbytes(last_eigenvector)); |
| |
| ggml_gallocr_free(allocr); |
|
|
| |
| |
| } |
|
|
| static void run_pca( |
| struct pca_params & params, |
| const std::vector<struct ggml_tensor *> & v_input, |
| const std::vector<struct ggml_tensor *> & v_output) { |
| printf("%s: Running PCA...\n", __func__); |
| for (size_t il = 0; il < v_input.size(); ++il) { |
|
|
| |
| struct ggml_tensor * ctrl_out = v_output[il]; |
| ggml_format_name(ctrl_out, "direction.%ld", il+1); |
|
|
| |
| params.i_layer = il; |
| params.n_layers = v_input.size(); |
| power_iteration(params, v_input[il], ctrl_out); |
| printf("%s: Done layer %d / %d\n", __func__, (int) il+1, (int) v_input.size()); |
| } |
| } |
|
|
| } |
|
|