// Copyright 2025 The ODML Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "runtime/executor/llm_executor_settings.h" #include // NOLINT: Required for path manipulation. #include #include #include #include #include #include #include "absl/status/status.h" // from @com_google_absl #include "absl/strings/str_cat.h" // from @com_google_absl #include "absl/strings/string_view.h" // from @com_google_absl #include "runtime/executor/executor_settings_base.h" #include "runtime/util/scoped_file.h" #include "runtime/util/test_utils.h" // IWYU pragma: keep namespace litert::lm { namespace { #if defined(_WIN32) constexpr absl::string_view kPathToModel1 = "\\path\\to\\model1"; constexpr absl::string_view kPathToModel1Tflite = "\\path\\to\\model1.tflite"; constexpr absl::string_view kPathToModel1TfliteCache = "\\path\\to\\model1.tflite.cache"; constexpr absl::string_view kPathToCache = "\\path\\to\\cache"; constexpr absl::string_view kWeightCachePath = "\\weight\\cache\\path"; constexpr absl::string_view kWeightCachePathWithSeparator = "\\weight\\cache\\path\\"; constexpr absl::string_view kWeightCachePathFile = "\\weight\\cache\\path\\model1.tflite.cache"; constexpr absl::string_view kWeightCachePathXnnpackFile = "\\weight\\cache\\path\\model1.tflite.xnnpack_cache"; constexpr absl::string_view kModel1TfliteCustomSuffix = "\\path\\to\\model1.tflite.custom_suffix"; #else constexpr absl::string_view kPathToModel1 = "/path/to/model1"; constexpr absl::string_view kPathToModel1Tflite = "/path/to/model1.tflite"; constexpr absl::string_view kPathToModel1TfliteCache = "/path/to/model1.tflite.cache"; constexpr absl::string_view kPathToCache = "/path/to/cache"; constexpr absl::string_view kWeightCachePath = "/weight/cache/path"; constexpr absl::string_view kWeightCachePathWithSeparator = "/weight/cache/path/"; constexpr absl::string_view kWeightCachePathFile = "/weight/cache/path/model1.tflite.cache"; constexpr absl::string_view kWeightCachePathXnnpackFile = "/weight/cache/path/model1.tflite.xnnpack_cache"; constexpr absl::string_view kModel1TfliteCustomSuffix = "/path/to/model1.tflite.custom_suffix"; #endif using absl::StatusCode::kInvalidArgument; using ::testing::VariantWith; using ::testing::status::StatusIs; TEST(LlmExecutorConfigTest, Backend) { Backend backend; std::stringstream oss; backend = Backend::CPU_ARTISAN; oss << backend; EXPECT_EQ(oss.str(), "CPU_ARTISAN"); backend = Backend::GPU_ARTISAN; oss.str(""); oss << backend; EXPECT_EQ(oss.str(), "GPU_ARTISAN"); backend = Backend::GPU; oss.str(""); oss << backend; EXPECT_EQ(oss.str(), "GPU"); backend = Backend::CPU; oss.str(""); oss << backend; EXPECT_EQ(oss.str(), "CPU"); backend = Backend::GOOGLE_TENSOR_ARTISAN; oss.str(""); oss << backend; EXPECT_EQ(oss.str(), "GOOGLE_TENSOR_ARTISAN"); backend = Backend::NPU; oss.str(""); oss << backend; EXPECT_EQ(oss.str(), "NPU"); } TEST(LlmExecutorConfigTest, StringToBackend) { ASSERT_OK_AND_ASSIGN(auto backend, GetBackendFromString("cpu_artisan")); EXPECT_EQ(backend, Backend::CPU_ARTISAN); ASSERT_OK_AND_ASSIGN(backend, GetBackendFromString("gpu_artisan")); EXPECT_EQ(backend, Backend::GPU_ARTISAN); ASSERT_OK_AND_ASSIGN(backend, GetBackendFromString("gpu")); EXPECT_EQ(backend, Backend::GPU); ASSERT_OK_AND_ASSIGN(backend, GetBackendFromString("cpu")); EXPECT_EQ(backend, Backend::CPU); ASSERT_OK_AND_ASSIGN(backend, GetBackendFromString("google_tensor_artisan")); EXPECT_EQ(backend, Backend::GOOGLE_TENSOR_ARTISAN); ASSERT_OK_AND_ASSIGN(backend, GetBackendFromString("npu")); EXPECT_EQ(backend, Backend::NPU); } TEST(LlmExecutorConfigTest, ActivationDataType) { ActivationDataType act; std::stringstream oss; act = ActivationDataType::FLOAT32; oss << act; EXPECT_EQ(oss.str(), "FLOAT32"); act = ActivationDataType::FLOAT16; oss.str(""); oss << act; EXPECT_EQ(oss.str(), "FLOAT16"); } TEST(LlmExecutorConfigTest, FakeWeightsMode) { FakeWeightsMode fake_weights_mode; std::stringstream oss; fake_weights_mode = FakeWeightsMode::FAKE_WEIGHTS_NONE; oss << fake_weights_mode; EXPECT_EQ(oss.str(), "FAKE_WEIGHTS_NONE"); fake_weights_mode = FakeWeightsMode::FAKE_WEIGHTS_8BITS_ALL_LAYERS; oss.str(""); oss << fake_weights_mode; EXPECT_EQ(oss.str(), "FAKE_WEIGHTS_8BITS_ALL_LAYERS"); fake_weights_mode = FakeWeightsMode::FAKE_WEIGHTS_ATTN_8_FFN_4_EMB_4; oss.str(""); oss << fake_weights_mode; EXPECT_EQ(oss.str(), "FAKE_WEIGHTS_ATTN_8_FFN_4_EMB_4"); } TEST(LlmExecutorConfigTest, FileFormat) { std::stringstream oss; oss.str(""); oss << FileFormat::TFLITE; EXPECT_EQ(oss.str(), "TFLITE"); oss.str(""); oss << FileFormat::TASK; EXPECT_EQ(oss.str(), "TASK"); oss.str(""); oss << FileFormat::LITERT_LM; EXPECT_EQ(oss.str(), "LITERT_LM"); } TEST(LlmExecutorConfigTest, ModelAssets) { auto model_assets = ModelAssets::Create(kPathToModel1); ASSERT_OK(model_assets); std::stringstream oss; oss << *model_assets; const std::string expected_output = absl::StrCat("model_path: ", kPathToModel1, "\nfake_weights_mode: FAKE_WEIGHTS_NONE\n"); EXPECT_EQ(oss.str(), expected_output); } GpuArtisanConfig CreateGpuArtisanConfig() { GpuArtisanConfig config; config.num_output_candidates = 1; config.wait_for_weight_uploads = true; config.num_decode_steps_per_sync = 3; config.sequence_batch_size = 16; config.supported_lora_ranks = {4, 16}; config.max_top_k = 40; config.enable_decode_logits = true; config.use_submodel = true; return config; } TEST(LlmExecutorConfigTest, GpuArtisanConfig) { GpuArtisanConfig config = CreateGpuArtisanConfig(); std::stringstream oss; oss << config; const std::string expected_output = R"(num_output_candidates: 1 wait_for_weight_uploads: 1 num_decode_steps_per_sync: 3 sequence_batch_size: 16 supported_lora_ranks: vector of 2 elements: [4, 16] max_top_k: 40 enable_decode_logits: 1 enable_external_embeddings: 0 use_submodel: 1 )"; EXPECT_EQ(oss.str(), expected_output); } TEST(LlmExecutorConfigTest, LlmExecutorSettings) { auto model_assets = ModelAssets::Create(kPathToModel1); ASSERT_OK(model_assets); ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( *std::move(model_assets), Backend::GPU_ARTISAN)); settings.SetBackendConfig(CreateGpuArtisanConfig()); settings.SetMaxNumTokens(1024); settings.SetActivationDataType(ActivationDataType::FLOAT16); settings.SetMaxNumImages(1); settings.SetCacheDir(std::string(kPathToCache)); std::stringstream oss; oss << settings; const std::string expected_output = absl::StrCat( R"(backend: GPU_ARTISAN backend_config: num_output_candidates: 1 wait_for_weight_uploads: 1 num_decode_steps_per_sync: 3 sequence_batch_size: 16 supported_lora_ranks: vector of 2 elements: [4, 16] max_top_k: 40 enable_decode_logits: 1 enable_external_embeddings: 0 use_submodel: 1 max_tokens: 1024 activation_data_type: FLOAT16 max_num_images: 1 lora_rank: 0 cache_dir: )", kPathToCache, R"( cache_file: Not set litert_dispatch_lib_dir: Not set model_assets: model_path: )", kPathToModel1, R"( fake_weights_mode: FAKE_WEIGHTS_NONE advanced_settings: Not set )"); EXPECT_EQ(oss.str(), expected_output); } TEST(LlmExecutorConfigTest, LlmExecutorSettingsWithAdvancedSettings) { auto model_assets = ModelAssets::Create(kPathToModel1); ASSERT_OK(model_assets); ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( *std::move(model_assets), Backend::GPU_ARTISAN)); settings.SetBackendConfig(CreateGpuArtisanConfig()); settings.SetMaxNumTokens(1024); settings.SetActivationDataType(ActivationDataType::FLOAT16); settings.SetMaxNumImages(1); settings.SetCacheDir(std::string(kPathToCache)); settings.SetAdvancedSettings(AdvancedSettings{ .prefill_batch_sizes = {128, 256}, .num_output_candidates = 3, .configure_magic_numbers = true, .verify_magic_numbers = true, .clear_kv_cache_before_prefill = false, .num_logits_to_print_after_decode = 10, .gpu_madvise_original_shared_tensors = true, .is_benchmark = true, .preferred_device_substr = "nvidia", .num_threads_to_upload = 4, .num_threads_to_compile = 2, .convert_weights_on_gpu = true, .wait_for_weights_conversion_complete_in_benchmark = false, .optimize_shader_compilation = false, .cache_compiled_shaders_only = true, .share_constant_tensors = false, .sampler_handles_input = false, .allow_src_quantized_fc_conv_ops = true, .hint_waiting_for_completion = false, .enable_speculative_decoding = false, .disable_delegate_clustering = false, }); std::stringstream oss; oss << settings; const std::string expected_output = absl::StrCat( R"(backend: GPU_ARTISAN backend_config: num_output_candidates: 1 wait_for_weight_uploads: 1 num_decode_steps_per_sync: 3 sequence_batch_size: 16 supported_lora_ranks: vector of 2 elements: [4, 16] max_top_k: 40 enable_decode_logits: 1 enable_external_embeddings: 0 use_submodel: 1 max_tokens: 1024 activation_data_type: FLOAT16 max_num_images: 1 lora_rank: 0 cache_dir: )", kPathToCache, R"( cache_file: Not set litert_dispatch_lib_dir: Not set model_assets: model_path: )", kPathToModel1, R"( fake_weights_mode: FAKE_WEIGHTS_NONE advanced_settings: prefill_batch_sizes: [128, 256] num_output_candidates: 3 configure_magic_numbers: 1 verify_magic_numbers: 1 clear_kv_cache_before_prefill: 0 num_logits_to_print_after_decode: 10 gpu_madvise_original_shared_tensors: 1 is_benchmark: 1 preferred_device_substr: nvidia num_threads_to_upload: 4 num_threads_to_compile: 2 convert_weights_on_gpu: 1 wait_for_weights_conversion_complete_in_benchmark: 0 optimize_shader_compilation: 0 cache_compiled_shaders_only: 1 share_constant_tensors: 0 sampler_handles_input: 0 allow_src_quantized_fc_conv_ops: 1 hint_waiting_for_completion: 0 gpu_context_low_priority: Not set enable_speculative_decoding: 0 disable_delegate_clustering: 0 )"); EXPECT_EQ(oss.str(), expected_output); } TEST(LlmExecutorConfigTest, AdvancedSettingsWithGpuContextLowPriority) { auto model_assets = ModelAssets::Create(kPathToModel1); ASSERT_OK(model_assets); ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( *std::move(model_assets), Backend::GPU_ARTISAN)); settings.SetAdvancedSettings(AdvancedSettings{ .gpu_context_low_priority = true, }); std::stringstream oss; oss << settings; EXPECT_THAT(oss.str(), ::testing::HasSubstr("gpu_context_low_priority: 1")); settings.SetAdvancedSettings(AdvancedSettings{ .gpu_context_low_priority = false, }); oss.str(""); oss << settings; EXPECT_THAT(oss.str(), ::testing::HasSubstr("gpu_context_low_priority: 0")); } TEST(GetWeightCacheFileTest, CacheDirAndModelPath) { auto model_assets = ModelAssets::Create(kPathToModel1Tflite); ASSERT_OK(model_assets); ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( *std::move(model_assets))); settings.SetCacheDir(std::string(kWeightCachePath)); ASSERT_OK_AND_ASSIGN(auto weight_cache_file, settings.GetWeightCacheFile()); EXPECT_THAT(weight_cache_file, VariantWith(std::string(kWeightCachePathFile))); } TEST(GetWeightCacheFileTest, CacheDirHasTrailingSeparator) { auto model_assets = ModelAssets::Create(kPathToModel1Tflite); ASSERT_OK(model_assets); ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( *std::move(model_assets))); settings.SetCacheDir(std::string(kWeightCachePathWithSeparator)); ASSERT_OK_AND_ASSIGN(auto weight_cache_file, settings.GetWeightCacheFile()); EXPECT_THAT(weight_cache_file, VariantWith(std::string(kWeightCachePathFile))); } TEST(GetWeightCacheFileTest, CacheDirAndModelPathAndCustomSuffix) { auto model_assets = ModelAssets::Create(kPathToModel1Tflite); ASSERT_OK(model_assets); ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( *std::move(model_assets))); settings.SetCacheDir(std::string(kWeightCachePath)); ASSERT_OK_AND_ASSIGN(auto weight_cache_file, settings.GetWeightCacheFile(".xnnpack_cache")); EXPECT_THAT(weight_cache_file, VariantWith( std::string(kWeightCachePathXnnpackFile))); } TEST(LlmExecutorConfigTest, ModelPathOnly) { auto model_assets = ModelAssets::Create(kPathToModel1Tflite); ASSERT_OK(model_assets); ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( *std::move(model_assets))); ASSERT_OK_AND_ASSIGN(auto weight_cache_file, settings.GetWeightCacheFile()); EXPECT_THAT(weight_cache_file, VariantWith(std::string(kPathToModel1TfliteCache))); } TEST(GetWeightCacheFileTest, ModelPathAndSuffix) { auto model_assets = ModelAssets::Create(kPathToModel1Tflite); ASSERT_OK(model_assets); ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( *std::move(model_assets))); ASSERT_OK_AND_ASSIGN(auto weight_cache_file, settings.GetWeightCacheFile(".custom_suffix")); EXPECT_THAT(weight_cache_file, VariantWith(std::string(kModel1TfliteCustomSuffix))); } TEST(GetWeightCacheFileTest, PreferScopedCacheFileToCacheDir) { const auto cache_path = std::filesystem::path(::testing::SrcDir()) / "litert_lm/runtime/testdata/test_lm.cache"; ASSERT_OK_AND_ASSIGN(auto cache_file, ScopedFile::Open(cache_path.string())); auto shared_cache_file = std::make_shared(std::move(cache_file)); auto model_assets = ModelAssets::Create(kPathToModel1Tflite); ASSERT_OK(model_assets); ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( *std::move(model_assets))); settings.SetScopedCacheFile(shared_cache_file); settings.SetCacheDir(std::string(kWeightCachePath)); ASSERT_OK_AND_ASSIGN(auto weight_cache_file, settings.GetWeightCacheFile()); EXPECT_THAT(weight_cache_file, VariantWith>(shared_cache_file)); } TEST(GetWeightCacheFileTest, PreferScopedCacheFileToScopedModelFile) { const auto model_path = std::filesystem::path(::testing::SrcDir()) / "litert_lm/runtime/testdata/test_lm.litertlm"; const auto cache_path = std::filesystem::path(::testing::SrcDir()) / "litert_lm/runtime/testdata/test_lm.cache"; ASSERT_OK_AND_ASSIGN(auto model_file, ScopedFile::Open(model_path.string())); ASSERT_OK_AND_ASSIGN(auto cache_file, ScopedFile::Open(cache_path.string())); auto shared_cache_file = std::make_shared(std::move(cache_file)); auto model_assets = ModelAssets::Create(std::make_shared(std::move(model_file))); ASSERT_OK(model_assets); ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( *std::move(model_assets))); settings.SetScopedCacheFile(shared_cache_file); ASSERT_OK_AND_ASSIGN(auto weight_cache_file, settings.GetWeightCacheFile()); EXPECT_THAT(weight_cache_file, VariantWith>(shared_cache_file)); } TEST(GetWeightCacheFileTest, EmptyModelPath) { auto model_assets = ModelAssets::Create(""); ASSERT_OK(model_assets); ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( *std::move(model_assets))); settings.SetCacheDir(std::string(kWeightCachePath)); EXPECT_THAT(settings.GetWeightCacheFile(".xnnpack_cache"), StatusIs(kInvalidArgument)); } TEST(GetWeightCacheFileTest, CacheDisabled) { const auto cache_path = std::filesystem::path(::testing::SrcDir()) / "litert_lm/runtime/testdata/test_lm.cache"; ASSERT_OK_AND_ASSIGN(auto cache_file, ScopedFile::Open(cache_path.string())); auto model_assets = ModelAssets::Create(kPathToModel1Tflite); ASSERT_OK(model_assets); ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( *std::move(model_assets))); settings.SetCacheDir(":nocache"); // This should be ignored in favor of the explicitly disabled cache dir. settings.SetScopedCacheFile( std::make_shared(std::move(cache_file))); EXPECT_THAT(settings.GetWeightCacheFile(), StatusIs(kInvalidArgument)); } TEST(LlmExecutorConfigTest, GetBackendConfig) { auto model_assets = ModelAssets::Create(kPathToModel1); ASSERT_OK(model_assets); ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( *std::move(model_assets), Backend::GPU_ARTISAN)); settings.SetBackendConfig(CreateGpuArtisanConfig()); ASSERT_OK_AND_ASSIGN(auto gpu_config, settings.GetBackendConfig()); EXPECT_EQ(gpu_config.num_output_candidates, 1); EXPECT_TRUE(gpu_config.use_submodel); // Test setting via MutableBackendConfig ASSERT_OK_AND_ASSIGN(auto mutable_gpu_config, settings.MutableBackendConfig()); mutable_gpu_config.use_submodel = false; settings.SetBackendConfig(mutable_gpu_config); ASSERT_OK_AND_ASSIGN(auto updated_gpu_config, settings.GetBackendConfig()); EXPECT_FALSE(updated_gpu_config.use_submodel); EXPECT_THAT(settings.GetBackendConfig(), StatusIs(kInvalidArgument)); } TEST(LlmExecutorConfigTest, MutableBackendConfig) { auto model_assets = ModelAssets::Create(kPathToModel1); ASSERT_OK(model_assets); ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( *std::move(model_assets), Backend::GPU_ARTISAN)); settings.SetBackendConfig(CreateGpuArtisanConfig()); ASSERT_OK_AND_ASSIGN(auto gpu_config, settings.MutableBackendConfig()); gpu_config.num_output_candidates = 2; settings.SetBackendConfig(gpu_config); ASSERT_OK_AND_ASSIGN(auto gpu_config_after_change, settings.GetBackendConfig()); EXPECT_EQ(gpu_config_after_change.num_output_candidates, 2); EXPECT_THAT(settings.MutableBackendConfig(), StatusIs(kInvalidArgument)); } TEST(LlmExecutorConfigTest, SetSupportedLoraRanks) { auto model_assets = ModelAssets::Create(kPathToModel1); ASSERT_OK(model_assets); ASSERT_OK_AND_ASSIGN(auto settings, LlmExecutorSettings::CreateDefault( *std::move(model_assets), Backend::GPU_ARTISAN, Backend::GPU)); EXPECT_EQ(settings.GetSamplerBackend(), Backend::GPU); } } // namespace } // namespace litert::lm