Spaces:
Running
Running
| // Copyright 2025 The ODML Authors. | |
| // | |
| // Licensed under the Apache License, Version 2.0 (the "License"); | |
| // you may not use this file except in compliance with the License. | |
| // You may obtain a copy of the License at | |
| // | |
| // http://www.apache.org/licenses/LICENSE-2.0 | |
| // | |
| // Unless required by applicable law or agreed to in writing, software | |
| // distributed under the License is distributed on an "AS IS" BASIS, | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| // See the License for the specific language governing permissions and | |
| // limitations under the License. | |
| namespace litert::lm { | |
| namespace { | |
| using ::testing::status::StatusIs; | |
| TEST(FakeLlmExecutorTest, ExecutorSettings) { | |
| const std::vector<std::vector<int>> prefill_tokens_set = {{1, 2, 3}}; | |
| const std::vector<std::vector<int>> decode_tokens_set = {{3, 2}, {0, 0}}; | |
| FakeLlmExecutor fake_llm_executor(3, prefill_tokens_set, decode_tokens_set); | |
| EXPECT_OK(fake_llm_executor.GetExecutorSettings()); | |
| EXPECT_EQ(fake_llm_executor.GetExecutorSettings()->GetMaxNumTokens(), 1024); | |
| // Set the max num tokens to 100. | |
| fake_llm_executor.GetMutableExecutorSettings().value()->SetMaxNumTokens(100); | |
| EXPECT_EQ(fake_llm_executor.GetExecutorSettings()->GetMaxNumTokens(), 100); | |
| } | |
| TEST(FakeLlmExecutorTest, UpdateExecutorSettings) { | |
| const std::vector<std::vector<int>> prefill_tokens_set = {{1, 2, 3}}; | |
| const std::vector<std::vector<int>> decode_tokens_set = {{3, 2}, {0, 0}}; | |
| FakeLlmExecutor fake_llm_executor(3, prefill_tokens_set, decode_tokens_set); | |
| ASSERT_OK_AND_ASSIGN(auto new_settings, | |
| fake_llm_executor.GetExecutorSettings()); | |
| new_settings.SetMaxNumTokens(200); | |
| // The default implementation should return OK. | |
| EXPECT_OK(fake_llm_executor.UpdateExecutorSettings(new_settings)); | |
| } | |
| TEST(FakeLlmExecutorTest, Prefill) { | |
| const std::vector<std::vector<int>> prefill_tokens_set = {{1, 2, 3}}; | |
| const std::vector<std::vector<int>> decode_tokens_set = {{3, 2}, {0, 0}}; | |
| FakeLlmExecutor fake_llm_executor(3, prefill_tokens_set, decode_tokens_set); | |
| ExecutorInputs inputs; | |
| // Create a tensor buffer with 3 elements but only the first two elements | |
| // match the expected prefill tokens. | |
| const std::vector<int> input_tokens = {1, 2, 0}; | |
| LITERT_ASSERT_OK_AND_ASSIGN( | |
| auto input_tokens_buffer, | |
| CopyToTensorBuffer<int>(absl::MakeSpan(input_tokens), {1, 3})); | |
| inputs.SetTextData(ExecutorTextData(std::move(input_tokens_buffer))); | |
| // Fail because the input tokens do not match the expected prefill tokens. | |
| EXPECT_THAT(fake_llm_executor.Prefill(inputs), | |
| StatusIs(absl::StatusCode::kInvalidArgument)); | |
| // Succeed because the input tokens match the expected prefill tokens. | |
| auto ids_span = ReferTensorBufferAsSpan<int>(*(*inputs.GetTextTokenIdsPtr())); | |
| (*ids_span)[2] = 3; | |
| EXPECT_OK(fake_llm_executor.Prefill(inputs)); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 3); | |
| } | |
| TEST(FakeLlmExecutorTest, PrefillWithAudio) { | |
| const std::vector<std::vector<int>> prefill_tokens_set = {{1, 2, 3}}; | |
| const std::vector<std::vector<int>> decode_tokens_set = {{3, 2}, {0, 0}}; | |
| std::vector<float> audio_embeddings_set = {1.0f, 2.0f, 3.0f, 4.0f}; | |
| FakeLlmExecutor fake_llm_executor(3, prefill_tokens_set, decode_tokens_set, | |
| /*batch_size=*/1, audio_embeddings_set); | |
| ExecutorInputs inputs; | |
| // Create a tensor buffer with 3 elements but only the first two elements | |
| // match the expected prefill tokens. | |
| const std::vector<int> input_tokens = {1, 2, 3}; | |
| LITERT_ASSERT_OK_AND_ASSIGN( | |
| auto input_tokens_buffer, | |
| CopyToTensorBuffer<int>(absl::MakeSpan(input_tokens), {1, 3})); | |
| inputs.SetTextData(ExecutorTextData(std::move(input_tokens_buffer))); | |
| const std::vector<float> input_audio_embedding = {1.0f, 2.0f, 3.0f, 0.0f}; | |
| LITERT_ASSERT_OK_AND_ASSIGN( | |
| auto input_audio_embedding_buffer, | |
| CopyToTensorBuffer<float>(absl::MakeSpan(input_audio_embedding), | |
| {1, 4, 1})); | |
| inputs.SetAudioData( | |
| ExecutorAudioData(std::move(input_audio_embedding_buffer), std::nullopt)); | |
| // Fail because the input audio embedding does not match the expected the | |
| // audio embedding set. | |
| EXPECT_THAT(fake_llm_executor.Prefill(inputs), | |
| StatusIs(absl::StatusCode::kInvalidArgument)); | |
| // Succeed because the input audio embedding matches the expected audio | |
| // embedding set. | |
| auto audio_embedding_span = | |
| ReferTensorBufferAsSpan<float>(*(*inputs.GetAudioEmbeddingsPtr())); | |
| (*audio_embedding_span)[3] = 4.0f; | |
| EXPECT_OK(fake_llm_executor.Prefill(inputs)); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 3); | |
| } | |
| TEST(FakeLlmExecutorTest, DecodeWithoutPrefillFailed) { | |
| const std::vector<std::vector<int>> prefill_tokens_set = {{1, 2, 3}}; | |
| const std::vector<std::vector<int>> decode_tokens_set = {{3}, {0}}; | |
| FakeLlmExecutor fake_llm_executor(/*vocab_size=*/4, prefill_tokens_set, | |
| decode_tokens_set); | |
| EXPECT_THAT(fake_llm_executor.Decode(), | |
| StatusIs(absl::StatusCode::kFailedPrecondition)); | |
| } | |
| TEST(FakeLlmExecutorTest, DecodeToIds) { | |
| const std::vector<std::vector<int>> prefill_tokens_set = {{1, 2, 3}}; | |
| const std::vector<std::vector<int>> decode_tokens_set = {{3}, {0}}; | |
| FakeLlmExecutor fake_llm_executor(4, prefill_tokens_set, decode_tokens_set); | |
| ExecutorInputs inputs; | |
| const std::vector<int> input_tokens = {1, 2, 3}; | |
| LITERT_ASSERT_OK_AND_ASSIGN( | |
| auto input_tokens_buffer, | |
| CopyToTensorBuffer<int>(absl::MakeSpan(input_tokens), {1, 3})); | |
| inputs.SetTextData(ExecutorTextData(std::move(input_tokens_buffer))); | |
| EXPECT_OK(fake_llm_executor.Prefill(inputs)); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 3); | |
| // Call Decode for the 1st time. The output tokens should be the 1st decode | |
| // tokens: 3. | |
| ASSERT_OK_AND_ASSIGN(auto output_tokens, fake_llm_executor.Decode()); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 4); | |
| EXPECT_EQ(output_tokens[0][0], 3); | |
| // Call Decode for the 2nd time. The output tokens should be the 2nd decode | |
| // tokens: 0. | |
| ASSERT_OK_AND_ASSIGN(output_tokens, fake_llm_executor.Decode()); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 5); | |
| EXPECT_EQ(output_tokens[0][0], 0); | |
| // Call Decode for the 3nd time. Should fail. | |
| EXPECT_THAT(fake_llm_executor.Decode(), | |
| StatusIs(absl::StatusCode::kInvalidArgument)); | |
| } | |
| TEST(FakeLlmExecutorTest, DecodeToLogits) { | |
| const std::vector<std::vector<int>> prefill_tokens_set = {{1, 2, 3}}; | |
| const std::vector<std::vector<int>> decode_tokens_set = {{3}, {0}}; | |
| FakeLlmExecutor fake_llm_executor(/*vocab_size=*/4, prefill_tokens_set, | |
| decode_tokens_set); | |
| ExecutorInputs inputs; | |
| const std::vector<int> input_tokens = {1, 2, 3}; | |
| LITERT_ASSERT_OK_AND_ASSIGN( | |
| auto input_tokens_buffer, | |
| CopyToTensorBuffer<int>(absl::MakeSpan(input_tokens), {1, 3})); | |
| inputs.SetTextData(ExecutorTextData(std::move(input_tokens_buffer))); | |
| EXPECT_OK(fake_llm_executor.Prefill(inputs)); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 3); | |
| // Create a tensor buffer with 3 elements but only the first two elements | |
| // match the expected prefill tokens. | |
| const std::vector<int> decode_input_tokens = {3}; | |
| LITERT_ASSERT_OK_AND_ASSIGN( | |
| auto decode_input_tokens_buffer, | |
| CopyToTensorBuffer<int>(absl::MakeSpan(decode_input_tokens), {1, 1})); | |
| inputs.SetTextData(ExecutorTextData(std::move(decode_input_tokens_buffer))); | |
| auto output_logits = CreateTensorBuffer<float>({1, 1, 4}); | |
| // Call Decode for the 1st time. The output logits should have values: | |
| // [-inf, -inf, -inf, inf]. | |
| EXPECT_OK(fake_llm_executor.Decode(inputs, *output_logits)); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 4); | |
| auto output_logits_span = ReferTensorBufferAsSpan<float>(*output_logits); | |
| EXPECT_LE((*output_logits_span)[0], 0.0f); | |
| EXPECT_LE((*output_logits_span)[1], 0.0f); | |
| EXPECT_LE((*output_logits_span)[2], 0.0f); | |
| EXPECT_GE((*output_logits_span)[3], 0.0f); | |
| // Call Decode for the 2nd time. The output logits should have values: | |
| // [inf, -inf, -inf, -inf]. | |
| EXPECT_OK(fake_llm_executor.Decode(inputs, *output_logits)); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 5); | |
| EXPECT_GE((*output_logits_span)[0], 0.0f); | |
| EXPECT_LE((*output_logits_span)[1], 0.0f); | |
| EXPECT_LE((*output_logits_span)[2], 0.0f); | |
| EXPECT_LE((*output_logits_span)[3], 0.0f); | |
| // Call Decode for the 3nd time. Should fail. | |
| EXPECT_THAT(fake_llm_executor.Decode(inputs, *output_logits), | |
| StatusIs(absl::StatusCode::kInvalidArgument)); | |
| } | |
| TEST(FakeLlmExecutorTest, DecodeLogits) { | |
| const std::vector<std::vector<int>> prefill_tokens_set = {{1, 2, 3}}; | |
| const std::vector<std::vector<int>> decode_tokens_set = {{3}, {0}}; | |
| FakeLlmExecutor fake_llm_executor(/*vocab_size=*/4, prefill_tokens_set, | |
| decode_tokens_set); | |
| ExecutorInputs inputs; | |
| const std::vector<int> input_tokens = {1, 2, 3}; | |
| LITERT_ASSERT_OK_AND_ASSIGN( | |
| auto input_tokens_buffer, | |
| CopyToTensorBuffer<int>(absl::MakeSpan(input_tokens), {1, 3})); | |
| inputs.SetTextData(ExecutorTextData(std::move(input_tokens_buffer))); | |
| EXPECT_OK(fake_llm_executor.Prefill(inputs)); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 3); | |
| // Create a tensor buffer with 3 elements but only the first two elements | |
| // match the expected prefill tokens. | |
| const std::vector<int> decode_input_tokens = {3}; | |
| LITERT_ASSERT_OK_AND_ASSIGN( | |
| auto decode_input_tokens_buffer, | |
| CopyToTensorBuffer<int>(absl::MakeSpan(decode_input_tokens), {1, 1})); | |
| inputs.SetTextData(ExecutorTextData(std::move(decode_input_tokens_buffer))); | |
| auto output_logits = fake_llm_executor.DecodeLogits(inputs); | |
| // Call Decode for the 1st time. The output logits should have values: | |
| // [-inf, -inf, -inf, inf]. | |
| EXPECT_TRUE(output_logits.ok()); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 4); | |
| auto output_logits_span = ReferTensorBufferAsSpan<float>(*output_logits); | |
| EXPECT_LE((*output_logits_span)[0], 0.0f); | |
| EXPECT_LE((*output_logits_span)[1], 0.0f); | |
| EXPECT_LE((*output_logits_span)[2], 0.0f); | |
| EXPECT_GE((*output_logits_span)[3], 0.0f); | |
| output_logits = fake_llm_executor.DecodeLogits(inputs); | |
| // Call Decode for the 2nd time. The output logits should have values: | |
| // [inf, -inf, -inf, -inf]. | |
| EXPECT_TRUE(output_logits.ok()); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 5); | |
| output_logits_span = ReferTensorBufferAsSpan<float>(*output_logits); | |
| EXPECT_GE((*output_logits_span)[0], 0.0f); | |
| EXPECT_LE((*output_logits_span)[1], 0.0f); | |
| EXPECT_LE((*output_logits_span)[2], 0.0f); | |
| EXPECT_LE((*output_logits_span)[3], 0.0f); | |
| // Call Decode for the 3nd time. Should fail. | |
| EXPECT_THAT(fake_llm_executor.Decode(inputs, *output_logits), | |
| StatusIs(absl::StatusCode::kInvalidArgument)); | |
| } | |
| TEST(FakeLlmExecutorTest, DecodeDelay) { | |
| const std::vector<std::vector<int>> prefill_tokens_set = {{1, 2, 3}}; | |
| const std::vector<std::vector<int>> decode_tokens_set = {{3}, {0}}; | |
| FakeLlmExecutor fake_llm_executor(/*vocab_size=*/4, prefill_tokens_set, | |
| decode_tokens_set); | |
| constexpr absl::Duration delay = absl::Milliseconds(100); | |
| fake_llm_executor.SetDecodeDelay(delay); | |
| ExecutorInputs inputs; | |
| const std::vector<int> input_tokens = {1, 2, 3}; | |
| LITERT_ASSERT_OK_AND_ASSIGN( | |
| auto input_tokens_buffer, | |
| CopyToTensorBuffer<int>(absl::MakeSpan(input_tokens), {1, 3})); | |
| inputs.SetTextData(ExecutorTextData(std::move(input_tokens_buffer))); | |
| EXPECT_OK(fake_llm_executor.Prefill(inputs)); | |
| const absl::Time start = absl::Now(); | |
| ASSERT_OK_AND_ASSIGN(auto output_tokens, fake_llm_executor.Decode()); | |
| const absl::Duration elapsed = absl::Now() - start; | |
| EXPECT_GE(elapsed, delay); | |
| } | |
| TEST(FakeLlmExecutorTest, MultiplePrefillTriggers) { | |
| const std::vector<std::vector<int>> prefill_tokens_set = {{1, 2, 3}, {4, 5}}; | |
| const std::vector<std::vector<int>> decode_tokens_set = {{6}, {7}, {8}, {9}}; | |
| FakeLlmExecutor fake_llm_executor(/*vocab_size=*/10, prefill_tokens_set, | |
| decode_tokens_set); | |
| // Trigger the first prefill/decode sequence. | |
| { | |
| ExecutorInputs inputs; | |
| const std::vector<int> input_tokens = {1, 2, 3}; | |
| LITERT_ASSERT_OK_AND_ASSIGN( | |
| auto input_tokens_buffer, | |
| CopyToTensorBuffer<int>(absl::MakeSpan(input_tokens), {1, 3})); | |
| inputs.SetTextData(ExecutorTextData(std::move(input_tokens_buffer))); | |
| EXPECT_OK(fake_llm_executor.Prefill(inputs)); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 3); | |
| ASSERT_OK_AND_ASSIGN(auto output_tokens, fake_llm_executor.Decode()); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 4); | |
| EXPECT_EQ(output_tokens[0][0], 6); | |
| ASSERT_OK_AND_ASSIGN(output_tokens, fake_llm_executor.Decode()); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 5); | |
| EXPECT_EQ(output_tokens[0][0], 7); | |
| } | |
| // Trigger the second prefill/decode sequence. | |
| { | |
| ExecutorInputs inputs; | |
| const std::vector<int> input_tokens = {4, 5}; | |
| LITERT_ASSERT_OK_AND_ASSIGN( | |
| auto input_tokens_buffer, | |
| CopyToTensorBuffer<int>(absl::MakeSpan(input_tokens), {1, 2})); | |
| inputs.SetTextData(ExecutorTextData(std::move(input_tokens_buffer))); | |
| EXPECT_OK(fake_llm_executor.Prefill(inputs)); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 7); | |
| ASSERT_OK_AND_ASSIGN(auto output_tokens, fake_llm_executor.Decode()); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 8); | |
| EXPECT_EQ(output_tokens[0][0], 8); | |
| ASSERT_OK_AND_ASSIGN(output_tokens, fake_llm_executor.Decode()); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 9); | |
| EXPECT_EQ(output_tokens[0][0], 9); | |
| } | |
| // Call Prefill for the 3rd time. Should fail. | |
| { | |
| ExecutorInputs inputs; | |
| const std::vector<int> input_tokens = {6}; | |
| LITERT_ASSERT_OK_AND_ASSIGN( | |
| auto input_tokens_buffer, | |
| CopyToTensorBuffer<int>(absl::MakeSpan(input_tokens), {1, 1})); | |
| inputs.SetTextData(ExecutorTextData(std::move(input_tokens_buffer))); | |
| EXPECT_THAT(fake_llm_executor.Prefill(inputs), | |
| StatusIs(absl::StatusCode::kInvalidArgument)); | |
| } | |
| } | |
| TEST(FakeLlmExecutorTest, DecodeWithConstraint) { | |
| const std::vector<std::vector<int>> prefill_tokens_set = {{1, 2, 3}}; | |
| const std::vector<std::vector<int>> decode_tokens_set = {{4}, {0}, {4}, {0}}; | |
| FakeLlmExecutor fake_llm_executor(/*vocab_size=*/10, prefill_tokens_set, | |
| decode_tokens_set); | |
| // Fake constraint that expects "4, 0". | |
| const std::vector<int> expected_token_ids = {4, 0}; | |
| auto constraint = FakeConstraint(expected_token_ids, /*vocabulary_size=*/10); | |
| ExecutorInputs inputs; | |
| const std::vector<int> input_tokens = {1, 2, 3}; | |
| LITERT_ASSERT_OK_AND_ASSIGN( | |
| auto input_tokens_buffer, | |
| CopyToTensorBuffer<int>(absl::MakeSpan(input_tokens), {1, 3})); | |
| inputs.SetTextData(ExecutorTextData(std::move(input_tokens_buffer))); | |
| EXPECT_OK(fake_llm_executor.Prefill(inputs)); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 3); | |
| auto constrained_decoder = | |
| std::make_unique<ConstrainedDecoder>(&constraint, | |
| /*num_output_candidates=*/1); | |
| auto decode_params = ExecutorDecodeParams(); | |
| decode_params.SetConstraintDecoder(constrained_decoder.get()); | |
| // Call Decode for the 1st time. The output tokens should be the 1st decode | |
| // tokens: 4. (first constraint token) | |
| ASSERT_OK_AND_ASSIGN(auto output_tokens, | |
| fake_llm_executor.Decode(decode_params)); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 4); | |
| EXPECT_EQ(output_tokens[0][0], 4); | |
| // Call Decode for the 2nd time. The output tokens should be the 2nd decode | |
| // tokens: 0. (second constraint token) | |
| ASSERT_OK_AND_ASSIGN(output_tokens, fake_llm_executor.Decode(decode_params)); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 5); | |
| EXPECT_EQ(output_tokens[0][0], 0); | |
| // Call Decode for the 3rd time. The output tokens should be the 3rd decode | |
| // tokens: 4. (first constraint token again) | |
| ASSERT_OK_AND_ASSIGN(output_tokens, fake_llm_executor.Decode(decode_params)); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 6); | |
| EXPECT_EQ(output_tokens[0][0], 4); | |
| // Call Decode for the 2nd time. The output tokens should be the 2nd decode | |
| // tokens: 0. (second constraint token again) | |
| ASSERT_OK_AND_ASSIGN(output_tokens, fake_llm_executor.Decode(decode_params)); | |
| EXPECT_EQ(fake_llm_executor.GetCurrentStep().value(), 7); | |
| EXPECT_EQ(output_tokens[0][0], 0); | |
| // Call Decode for the 5nd time. Should fail. | |
| EXPECT_THAT(fake_llm_executor.Decode(decode_params), | |
| StatusIs(absl::StatusCode::kInvalidArgument)); | |
| } | |
| } // namespace | |
| } // namespace litert::lm | |