Spaces:

FINAL-Bench
/

LiteRT-LM

Running

App Files Files Community

LiteRT-LM / runtime /core /engine_impl.cc

SeaWolf-AI

Upload full LiteRT-LM codebase

5f923cd verified 3 days ago

raw

history blame contribute delete

17.8 kB

	// Copyright 2025 The ODML Authors.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	// TODO(b/417209286): Remove this once the model assets are stored in the
	// litertlm file format.
	#include <filesystem> // NOLINT: Required for path manipulation.
	#include <future> // NOLINT(build/c++11)
	#include <memory>
	#include <optional>
	#include <string>
	#include <utility>
	#include <vector>

	#include "absl/base/no_destructor.h" // from @com_google_absl
	#include "absl/log/absl_check.h" // from @com_google_absl
	#include "absl/log/absl_log.h" // from @com_google_absl
	#include "absl/log/check.h" // from @com_google_absl
	#include "absl/status/status.h" // from @com_google_absl
	#include "absl/status/statusor.h" // from @com_google_absl
	#include "absl/strings/string_view.h" // from @com_google_absl
	#include "absl/time/clock.h" // from @com_google_absl
	#include "absl/time/time.h" // from @com_google_absl
	#include "litert/cc/litert_environment.h" // from @litert
	#include "litert/cc/litert_macros.h" // from @litert
	#include "runtime/components/model_resources.h"
	#include "runtime/components/tokenizer.h"
	#include "runtime/core/session_factory.h"
	#include "runtime/engine/engine.h"
	#include "runtime/engine/engine_factory.h"
	#include "runtime/engine/engine_settings.h"
	#include "runtime/engine/io_types.h"
	#include "runtime/executor/audio_executor.h"
	#include "runtime/executor/audio_executor_settings.h"
	#include "runtime/executor/audio_litert_compiled_model_executor.h"
	#include "runtime/executor/executor_settings_base.h"
	#include "runtime/executor/litert_compiled_model_executor_utils.h"
	#include "runtime/executor/llm_executor.h"
	#include "runtime/executor/llm_executor_settings.h"
	#include "runtime/executor/llm_litert_compiled_model_executor_factory.h"
	#include "runtime/executor/magic_number_configs_helper.h"
	#include "runtime/executor/vision_executor.h"
	#include "runtime/executor/vision_litert_compiled_model_executor.h"
	#include "runtime/framework/threadpool.h"
	#include "runtime/proto/llm_metadata.pb.h"
	#include "runtime/proto/sampler_params.pb.h"
	#include "runtime/util/status_macros.h" // NOLINT

	namespace litert::lm {
	namespace {

	// Gets the singleton Environment, initializing it on the first call
	// with the provided settings. This ensure we maintain the same LiteRT
	// environment during the whole application lifetime. This is required for GPU
	// LiteRT environment. See b/454383477 for more details.
	absl::StatusOr<Environment&> GetEnvironment(EngineSettings& engine_settings,
	ModelResources& model_resources) {
	// Helper must be available until LlmLiteRtCompiledModelExecutor::Create() is
	// called. Since env is used multiple times, it should also be static.
	static absl::NoDestructor<MagicNumberConfigsHelper> helper;
	static absl::NoDestructor<absl::StatusOr<Environment>> kEnvironment(
	[&]() -> absl::StatusOr<Environment> {
	std::vector<Environment::Option> env_options;
	const auto& main_executor_settings =
	engine_settings.GetMainExecutorSettings();

	if ((main_executor_settings.GetBackend() == Backend::CPU) \|\|
	(main_executor_settings.GetBackend() == Backend::GPU)) {
	if (!main_executor_settings
	.GetAdvancedSettings() \|\| // Default is true.
	main_executor_settings.GetAdvancedSettings()
	->configure_magic_numbers) {
	env_options = helper->GetLiteRtEnvOptions(model_resources,
	main_executor_settings);
	}
	} else {
	#if defined(LITERT_DISABLE_NPU)
	return absl::InvalidArgumentError(
	"Only CPU and GPU backends are supported.");
	#else
	if (!main_executor_settings.GetLitertDispatchLibDir().empty()) {
	// If the dispatch library directory is provided, use it.
	env_options.push_back(::litert::Environment::Option{
	::litert::Environment::OptionTag::DispatchLibraryDir,
	main_executor_settings.GetLitertDispatchLibDir()});
	ABSL_LOG(INFO) << "Setting dispatch library path from "
	"main_executor_settings: "
	<< main_executor_settings.GetLitertDispatchLibDir();
	} else {
	// Otherwise, use the directory of the model file.
	std::string model_path(
	main_executor_settings.GetModelAssets().GetPath().value_or(""));
	std::filesystem::path path(model_path);
	// Note: Existence check for path was here, but it's better to check
	// before calling this function if needed.
	static const absl::NoDestructor<std::string> kDispatchLibraryPath(
	path.parent_path().string());
	if (!kDispatchLibraryPath->empty()) {
	ABSL_LOG(INFO)
	<< "Setting dispatch library path: " << *kDispatchLibraryPath;
	env_options.push_back(::litert::Environment::Option{
	::litert::Environment::OptionTag::DispatchLibraryDir,
	absl::string_view(*kDispatchLibraryPath)});
	} else {
	ABSL_LOG(INFO) << "No dispatch library path provided.";
	}
	}
	#endif // defined(LITERT_DISABLE_NPU)
	}
	LITERT_ASSIGN_OR_RETURN(auto env, Environment::Create(env_options));
	return std::move(env);
	}());
	if (!kEnvironment->ok()) {
	return kEnvironment->status();
	}
	return **kEnvironment;
	}

	class EngineImpl : public Engine {
	public:
	~EngineImpl() override {
	ABSL_QCHECK_OK(WaitUntilDone(Engine::kDefaultTimeout));
	}

	static absl::StatusOr<std::unique_ptr<Engine>> Create(
	EngineSettings engine_settings, absl::string_view input_prompt_as_hint);

	EngineImpl(EngineSettings engine_settings,
	std::unique_ptr<ModelResources> litert_model_resources,
	std::unique_ptr<Tokenizer> tokenizer,
	std::unique_ptr<LlmExecutor> executor,
	std::unique_ptr<VisionExecutor> vision_executor,
	std::unique_ptr<AudioExecutor> audio_executor,
	std::optional<BenchmarkInfo> benchmark_info,
	std::unique_ptr<ThreadPool> worker_thread_pool)
	: engine_settings_(std::move(engine_settings)),
	litert_model_resources_(std::move(litert_model_resources)),
	tokenizer_(std::move(tokenizer)),
	executor_(std::move(executor)),
	vision_executor_(std::move(vision_executor)),
	audio_executor_(std::move(audio_executor)),
	stop_token_ids_(),
	sampler_params_(),
	benchmark_info_(std::move(benchmark_info)),
	worker_thread_pool_(std::move(worker_thread_pool)) {}
	// Method to create the Session.
	absl::StatusOr<std::unique_ptr<Session>> CreateSession(
	const SessionConfig& session_config) override {
	std::optional<BenchmarkInfo> session_benchmark_info;
	if (benchmark_info_.has_value()) {
	// Each session will have its own benchmark info, which will be populated
	// with the session-specific information.
	session_benchmark_info = benchmark_info_;
	RETURN_IF_ERROR(session_benchmark_info->TimeInitPhaseStart(
	BenchmarkInfo::InitPhase::kSession));
	}

	SessionConfig config = session_config;
	// TODO(b/418794726): Move this logics to be part of the SessionConfig
	// class.
	RETURN_IF_ERROR(config.MaybeUpdateAndValidate(engine_settings_));

	ABSL_CHECK(litert_model_resources_ != nullptr);
	ASSIGN_OR_RETURN(
	auto session,
	InitializeSessionBasic(executor_.get(), tokenizer_.get(),
	/vision_executor=/vision_executor_.get(),
	/audio_executor=/audio_executor_.get(), config,
	std::move(session_benchmark_info),
	worker_thread_pool_.get()));
	if (benchmark_info_.has_value()) {
	auto session_benchmark_info_or = session->GetMutableBenchmarkInfo();
	if (session_benchmark_info_or.ok()) {
	RETURN_IF_ERROR(session_benchmark_info_or.value()->TimeInitPhaseEnd(
	BenchmarkInfo::InitPhase::kSession));
	}
	}
	return session;
	}
	absl::Status WaitUntilDone(absl::Duration timeout) override {
	return worker_thread_pool_->WaitUntilDone(timeout);
	}

	const EngineSettings& GetEngineSettings() const override {
	return engine_settings_;
	}

	const Tokenizer& GetTokenizer() const override { return *tokenizer_; }

	absl::StatusOr<AudioExecutorProperties> GetAudioExecutorProperties()
	const override {
	if (audio_executor_ == nullptr) {
	return absl::FailedPreconditionError("Audio modality is not enabled.");
	}
	return audio_executor_->GetAudioExecutorProperties();
	}

	absl::StatusOr<VisionExecutorProperties> GetVisionExecutorProperties()
	const override {
	if (vision_executor_ == nullptr) {
	return absl::FailedPreconditionError("Vision modality is not enabled.");
	}
	return vision_executor_->GetVisionExecutorProperties();
	}

	private:
	// Stored engine settings.
	EngineSettings engine_settings_;
	// Model resources, which must outlive `executor_`.
	std::unique_ptr<ModelResources> litert_model_resources_;
	// Tokenizer shared by all sessions.
	std::unique_ptr<Tokenizer> tokenizer_;
	// Shared executor for all sessions.
	std::unique_ptr<LlmExecutor> executor_;
	// Shared vision executor for all sessions.
	std::unique_ptr<VisionExecutor> vision_executor_;
	// shared audio executor for all sessions.
	std::unique_ptr<AudioExecutor> audio_executor_;
	// Default stop token ids for all sessions loaded from the model file.
	std::vector<std::vector<int>> stop_token_ids_;
	proto::SamplerParameters sampler_params_;

	// Benchmark info for the engine.
	std::optional<BenchmarkInfo> benchmark_info_;

	// Thread pool for the engine to execute the works.
	std::unique_ptr<ThreadPool> worker_thread_pool_;
	};

	// Method to create Engine.
	absl::StatusOr<std::unique_ptr<Engine>> EngineImpl::Create(
	EngineSettings engine_settings, absl::string_view input_prompt_as_hint) {
	std::optional<BenchmarkInfo> benchmark_info =
	engine_settings.IsBenchmarkEnabled()
	? std::make_optional<BenchmarkInfo>(
	engine_settings.GetBenchmarkParams().value())
	: std::nullopt;

	if (benchmark_info.has_value()) {
	RETURN_IF_ERROR(
	benchmark_info->TimeInitPhaseStart(BenchmarkInfo::InitPhase::kTotal));
	RETURN_IF_ERROR(benchmark_info->TimeInitPhaseStart(
	BenchmarkInfo::InitPhase::kModelAssets));
	}
	const auto& model_assets =
	engine_settings.GetMutableMainExecutorSettings().GetModelAssets();
	ASSIGN_OR_RETURN(auto model_resources,
	BuildLiteRtCompiledModelResources(model_assets));
	if (benchmark_info.has_value()) {
	RETURN_IF_ERROR(benchmark_info->TimeInitPhaseEnd(
	BenchmarkInfo::InitPhase::kModelAssets));
	}

	if (benchmark_info.has_value()) {
	RETURN_IF_ERROR(benchmark_info->TimeInitPhaseStart(
	BenchmarkInfo::InitPhase::kLlmMetadata));
	}
	ASSIGN_OR_RETURN(auto* llm_metadata, model_resources->GetLlmMetadata());
	if (benchmark_info.has_value()) {
	RETURN_IF_ERROR(benchmark_info->TimeInitPhaseEnd(
	BenchmarkInfo::InitPhase::kLlmMetadata));
	}
	bool hasLlmModelType = llm_metadata->has_llm_model_type();
	absl::Duration tokenizer_duration = absl::ZeroDuration();
	// This lambda is used to create the tokenizer asynchronously if the model
	// type is available, such that the tokenizer can be created in parallel with
	// the executor.
	auto create_tokenizer =
	[&tokenizer_duration,
	&model_resources]() -> absl::StatusOr<std::unique_ptr<Tokenizer>> {
	absl::Time start_time = absl::Now();
	ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer> tokenizer,
	model_resources->GetTokenizer());
	tokenizer_duration = absl::Now() - start_time;
	return tokenizer;
	};

	const auto& main_executor_settings =
	engine_settings.GetMainExecutorSettings();

	std::future<absl::StatusOr<std::unique_ptr<Tokenizer>>> tokenizer_future;
	std::unique_ptr<Tokenizer> tokenizer;
	if (!hasLlmModelType) {
	ABSL_LOG(INFO)
	<< "Legacy model files don't have LlmModelType, loading tokenizer now";
	ASSIGN_OR_RETURN(tokenizer, create_tokenizer());
	// Update and load the parameters from the model file and convert the
	// tokens to ids.
	RETURN_IF_ERROR(engine_settings.MaybeUpdateAndValidate(
	tokenizer.get(), llm_metadata, input_prompt_as_hint,
	model_resources->GetTFLiteModelBackendConstraint(
	ModelType::kTfLitePrefillDecode),
	model_resources->GetTFLiteModelBackendConstraint(
	ModelType::kTfLiteVisionEncoder),
	model_resources->GetTFLiteModelBackendConstraint(
	ModelType::kTfLiteAudioEncoderHw)));
	} else {
	// If the model type is available, wait for the tokenizer to be created
	// after the model is loaded.
	ABSL_LOG(INFO) << "New model files have LlmModelType, loading tokenizer "
	"asynchronously";

	if (engine_settings.GetParallelFileSectionLoading()) {
	tokenizer_future = std::async(std::launch::async, create_tokenizer);
	} else {
	tokenizer_future = std::async(std::launch::deferred, create_tokenizer);
	}

	RETURN_IF_ERROR(engine_settings.MaybeUpdateAndValidate(
	nullptr, llm_metadata, input_prompt_as_hint,
	model_resources->GetTFLiteModelBackendConstraint(
	ModelType::kTfLitePrefillDecode),
	model_resources->GetTFLiteModelBackendConstraint(
	ModelType::kTfLiteVisionEncoder),
	model_resources->GetTFLiteModelBackendConstraint(
	ModelType::kTfLiteAudioEncoderHw)));
	}

	if (benchmark_info.has_value()) {
	RETURN_IF_ERROR(benchmark_info->TimeInitPhaseStart(
	BenchmarkInfo::InitPhase::kExecutor));
	}
	std::unique_ptr<LlmExecutor> executor;
	ASSIGN_OR_RETURN(auto& env,
	GetEnvironment(engine_settings, *model_resources));

	switch (main_executor_settings.GetBackend()) {
	default: {
	ASSIGN_OR_RETURN(executor,
	CreateLlmLiteRtCompiledModelExecutor(
	main_executor_settings, env, *model_resources));
	}
	};

	// TODO - b/436674053: Modularize the executor creation logic into a
	// separate executor class, and have unit test for it.
	std::unique_ptr<VisionExecutor> vision_executor;
	if (engine_settings.GetVisionExecutorSettings().has_value()) {
	ASSIGN_OR_RETURN(
	vision_executor,
	VisionLiteRtCompiledModelExecutor::Create(
	engine_settings.GetMutableVisionExecutorSettings().value(), env));
	}

	std::unique_ptr<AudioExecutor> audio_executor;
	if (engine_settings.GetAudioExecutorSettings().has_value()) {
	ASSIGN_OR_RETURN(
	audio_executor,
	AudioLiteRtCompiledModelExecutor::Create(
	engine_settings.GetAudioExecutorSettings().value(), env));
	}

	if (benchmark_info.has_value()) {
	RETURN_IF_ERROR(
	benchmark_info->TimeInitPhaseEnd(BenchmarkInfo::InitPhase::kExecutor));
	}

	if (hasLlmModelType) {
	// Now load the tokenizer and update the engine settings.
	ASSIGN_OR_RETURN(tokenizer, tokenizer_future.get());
	RETURN_IF_ERROR(engine_settings.MaybeUpdateAndValidate(
	tokenizer.get(), llm_metadata, input_prompt_as_hint,
	model_resources->GetTFLiteModelBackendConstraint(
	ModelType::kTfLitePrefillDecode),
	model_resources->GetTFLiteModelBackendConstraint(
	ModelType::kTfLiteVisionEncoder),
	model_resources->GetTFLiteModelBackendConstraint(
	ModelType::kTfLiteAudioEncoderHw)));
	// As we load the tokenizer asynchronously, we need to update the executor
	// settings after the tokenizer is loaded.
	RETURN_IF_ERROR(executor->UpdateExecutorSettings(
	engine_settings.GetMainExecutorSettings()));
	}
	if (benchmark_info.has_value()) {
	RETURN_IF_ERROR(benchmark_info->InitPhaseRecord(
	BenchmarkInfo::InitPhase::kTokenizer, tokenizer_duration));
	}

	// Creating the thread pool of a single thread to execute the works.
	auto worker_thread_pool =
	std::make_unique<ThreadPool>(/name_prefix=/"engine",
	/max_num_threads=/1);

	if (benchmark_info.has_value()) {
	RETURN_IF_ERROR(
	benchmark_info->TimeInitPhaseEnd(BenchmarkInfo::InitPhase::kTotal));
	}

	auto llm_impl = std::make_unique<EngineImpl>(
	std::move(engine_settings), std::move(model_resources),
	std::move(tokenizer), std::move(executor), std::move(vision_executor),
	std::move(audio_executor), std::move(benchmark_info),
	std::move(worker_thread_pool));

	return llm_impl;
	};

	LITERT_LM_REGISTER_ENGINE(EngineFactory::EngineType::kLiteRTCompiledModel,
	[](EngineSettings settings,
	absl::string_view input_prompt_as_hint) {
	return EngineImpl::Create(std::move(settings),
	input_prompt_as_hint);
	});
	} // namespace
	} // namespace litert::lm