Spaces:

FINAL-Bench
/

LiteRT-LM

Running

App Files Files Community

LiteRT-LM / runtime /executor /executor_settings_base.h

SeaWolf-AI

Upload full LiteRT-LM codebase

5f923cd verified 3 days ago

raw

history blame contribute delete

9.93 kB

	// Copyright 2025 The ODML Authors.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	#ifndef THIRD_PARTY_ODML_LITE_RT_LLM_EXECUTOR_EXECUTOR_SETTINGS_BASE_H_
	#define THIRD_PARTY_ODML_LITE_RT_LLM_EXECUTOR_EXECUTOR_SETTINGS_BASE_H_

	#include <iostream>
	#include <memory>
	#include <optional>
	#include <string>
	#include <utility>

	#include "absl/status/status.h" // from @com_google_absl
	#include "absl/status/statusor.h" // from @com_google_absl
	#include "absl/strings/string_view.h" // from @com_google_absl
	#include "runtime/util/memory_mapped_file.h"
	#include "runtime/util/scoped_file.h"

	namespace litert::lm {

	enum class Backend {
	// Unspecified backend.
	UNSPECIFIED,

	// CPU hand-written path backend.
	CPU_ARTISAN,

	// GPU hand-written path backend.
	GPU_ARTISAN,

	// CPU LiteRT backend.
	CPU,

	// GPU LiteRT backend.
	GPU,

	// Google Tensor Emission Graph backend.
	GOOGLE_TENSOR_ARTISAN,

	// NPU backend.
	NPU,
	};
	std::ostream& operator<<(std::ostream& os, const Backend& backend);
	// Returns the backend enum from the string. Case-insensitive.
	absl::StatusOr<Backend> GetBackendFromString(absl::string_view backend_str);
	// Returns the string representation of the backend enum.
	std::string GetBackendString(Backend backend);

	enum class ActivationDataType {
	// Use float32 as the activation data type.
	FLOAT32,

	// Use float16 as the activation data type.
	FLOAT16,

	// Use int16 as the activation data type.
	INT16,

	// Use int8 as the activation data type.
	INT8,
	};
	std::ostream& operator<<(std::ostream& os,
	const ActivationDataType& activation);

	absl::StatusOr<ActivationDataType> GetActivationDataTypeFromString(
	const std::string& activation_data_type);

	// Fake weights mode.
	enum class FakeWeightsMode {
	// Don't use fake weights, read real weights from disk.
	FAKE_WEIGHTS_NONE,

	// Replace all weights with INT8 fakes.
	FAKE_WEIGHTS_8BITS_ALL_LAYERS,

	// Replace feedforward and embedding weights with INT4 fakes and replace
	// attention weights with INT8 fakes.
	FAKE_WEIGHTS_ATTN_8_FFN_4_EMB_4,
	};
	std::ostream& operator<<(std::ostream& os,
	const FakeWeightsMode& fake_weights_mode);

	enum class FileFormat {
	// .tflite file format.
	TFLITE,

	// .task file format.
	TASK,

	// .litert_lm file format.
	LITERT_LM,
	};
	std::ostream& operator<<(std::ostream& os, const FileFormat& file_format);

	// Class to host the model assets, including base models and lora models.
	class ModelAssets {
	public:
	static absl::StatusOr<ModelAssets> Create(
	std::shared_ptr<ScopedFile> model_file);
	static absl::StatusOr<ModelAssets> Create(absl::string_view model_path);
	static absl::StatusOr<ModelAssets> Create(
	std::shared_ptr<MemoryMappedFile> model_file);
	static absl::StatusOr<ModelAssets> Create(
	std::shared_ptr<MemoryMappedFile> model_file,
	absl::string_view model_path);

	// Convenience factory function to create a ModelAssets with both a model
	// path and file. Will use the scoped file if both are provided.
	static absl::StatusOr<ModelAssets> Create(
	std::shared_ptr<ScopedFile> model_file, absl::string_view model_path);

	bool HasScopedFile() const { return scoped_file_ != nullptr; }
	bool HasMemoryMappedFile() const { return memory_mapped_file_ != nullptr; }

	// Returns the model file if it was created with the respective variant,
	// otherwise returns an error.
	absl::StatusOr<absl::string_view> GetPath() const;
	absl::StatusOr<std::shared_ptr<ScopedFile>> GetScopedFile() const;
	absl::StatusOr<std::shared_ptr<MemoryMappedFile>> GetMemoryMappedFile() const;

	// Convenience method to get a read-only scoped file to the model file
	// regardless of whether this instance was created from a path or scoped file.
	absl::StatusOr<std::shared_ptr<ScopedFile>> GetOrCreateScopedFile() const;

	FakeWeightsMode fake_weights_mode() const { return fake_weights_mode_; }

	void SetFakeWeightsMode(FakeWeightsMode fake_weights_mode) {
	fake_weights_mode_ = fake_weights_mode;
	}

	private:
	explicit ModelAssets(std::shared_ptr<ScopedFile> model_file,
	absl::string_view model_path);
	explicit ModelAssets(absl::string_view model_path);
	explicit ModelAssets(std::shared_ptr<MemoryMappedFile> model_file);
	explicit ModelAssets(std::shared_ptr<MemoryMappedFile> model_file,
	absl::string_view model_path);

	// TODO: b/417814685 - Consider supporting multiple model files if the need
	// case arises.
	std::string path_;
	std::shared_ptr<ScopedFile> scoped_file_;
	std::shared_ptr<MemoryMappedFile> memory_mapped_file_;

	FakeWeightsMode fake_weights_mode_ = FakeWeightsMode::FAKE_WEIGHTS_NONE;
	};
	std::ostream& operator<<(std::ostream& os, const ModelAssets& model_assets);

	// Base Settings for the executor modules.
	class ExecutorSettingsBase {
	public:
	virtual ~ExecutorSettingsBase() = default;

	// Getter APIs.
	const ModelAssets& GetModelAssets() const { return model_assets_; }

	// Backend APIs.
	const Backend& GetBackend() const { return backend_; }
	virtual absl::Status SetBackend(const Backend& backend) {
	backend_ = backend;
	return absl::OkStatus();
	}

	// Activation data type APIs.
	const std::optional<ActivationDataType>& GetActivationDataType() const {
	return activation_data_type_;
	}
	void SetActivationDataType(const ActivationDataType& activation_data_type) {
	activation_data_type_ = activation_data_type;
	}

	// Should be used by consumers who want to write to a single weight cache
	// file. Returns, in order of preference:
	// 1. an open file descriptor to the weight cache file,
	// 2. the file path of the weight cache file, based on the given cache
	// directory and/or model path. Will append `suffix`.
	// 3. an error if a weight cache file could not be determined.
	absl::StatusOr<
	std::variant<std::string, std::shared_ptr<litert::lm::ScopedFile>>>
	GetWeightCacheFile(absl::string_view suffix = ".cache") const;
	// Prefer to use `GetWeightCacheFile()` if possible.
	const std::string& GetCacheDir() const { return cache_dir_; }
	// Prefer to use `GetWeightCacheFile()` if possible.
	std::shared_ptr<litert::lm::ScopedFile> GetScopedCacheFile() const {
	return scoped_cache_file_;
	}
	const std::string& GetLitertDispatchLibDir() const {
	return litert_dispatch_lib_dir_;
	}

	// Should be used by consumers who want to write to a single program cache
	// file. Returns, in order of preference:
	// 1. an open file descriptor to the program cache file,
	// 2. the file path of the program cache file, based on the given cache
	// directory and/or model path. Will append `suffix`.
	// 3. an error if a program cache file could not be determined.
	absl::StatusOr<
	std::variant<std::string, std::shared_ptr<litert::lm::ScopedFile>>>
	GetProgramCacheFile(absl::string_view suffix = ".program_cache") const;
	// Prefer to use `GetProgramCacheFile()` if possible.
	std::shared_ptr<litert::lm::ScopedFile> GetScopedProgramCacheFile() const {
	return scoped_program_cache_file_;
	}

	// Setter APIs.
	void SetCacheDir(const std::string& cache_dir) { cache_dir_ = cache_dir; }
	void SetScopedCacheFile(std::shared_ptr<litert::lm::ScopedFile> cache_file) {
	scoped_cache_file_ = std::move(cache_file);
	}
	void SetLitertDispatchLibDir(const std::string& litert_dispatch_lib_dir) {
	litert_dispatch_lib_dir_ = litert_dispatch_lib_dir;
	}

	void SetScopedProgramCacheFile(
	std::shared_ptr<litert::lm::ScopedFile> cache_file) {
	scoped_program_cache_file_ = std::move(cache_file);
	}

	protected:
	explicit ExecutorSettingsBase(ModelAssets model_assets)
	: model_assets_(std::move(model_assets)) {}
	// Optional setting to use LLM executor backend.
	Backend backend_ = Backend::CPU;

	private:
	// Path to the LiteRT model file.
	ModelAssets model_assets_;

	// Directory for saving the weight cache file. If this is set and the
	// backend supports it, the re-arranged weights will be stored in the
	// directory after the 1st initialization, making the future initialization
	// to be much faster.
	//
	// Consumers should prefer to use the `cache_file_` if set.
	std::string cache_dir_;

	// Open file for writing the weight cache to and later loading cache from.
	// If set, this should be preferred over the `cache_dir_`.
	std::shared_ptr<litert::lm::ScopedFile> scoped_cache_file_;

	// Open file for writing the program cache to and later loading cache from.
	// If set, this should be preferred over the `cache_dir_`.
	std::shared_ptr<litert::lm::ScopedFile> scoped_program_cache_file_;

	// Optional setting for specific activation data type. If not set, the
	// default activation data type for each OS & backend will be used. Setting
	// this field will override the default activation data type, for example,
	// OpenCL backend only support fp32 on Linux.
	std::optional<ActivationDataType> activation_data_type_;

	// Optional LoRA model assets.
	std::optional<ModelAssets> lora_model_assets_;

	// LiteRT dispatch library directory. If not set, the runtime will look for
	// the library in the path defined as the environment variables.
	std::string litert_dispatch_lib_dir_;
	};

	} // namespace litert::lm

	#endif // THIRD_PARTY_ODML_LITE_RT_LLM_EXECUTOR_LLM_EXECUTOR_SETTINGS_H_