import unittest from toc_processor import merge_same_page_headers, clean_text, parse_raw_toc_output class TestTOCProcessor(unittest.TestCase): def test_merge_same_page_headers(self): # Scenario: "American Government..." (Page 31) followed by "Divided World" (Page 31) input_toc = [ [1, "Chapter 1 Intro", 5], [1, "American Government and Politics in a Racially", 31], [1, "Divided World", 31], [1, "Chapter 2", 57] ] expected_toc = [ [1, "Chapter 1 Intro", 5], [1, "American Government and Politics in a Racially Divided World", 31], [1, "Chapter 2", 57] ] result = merge_same_page_headers(input_toc) print(f"\nInput: {[e[1] for e in input_toc]}") print(f"Result: {[e[1] for e in result]}") self.assertEqual(len(result), 3) self.assertEqual(result[1][1], "American Government and Politics in a Racially Divided World") self.assertEqual(result[1][2], 31) def test_merge_same_page_headers_mixed_levels(self): # Scenario: Level 1 followed by Level 2 on same page (Should NOT merge) input_toc = [ [1, "Chapter 1", 10], [2, "Section 1.1", 10] ] result = merge_same_page_headers(input_toc) self.assertEqual(len(result), 2) def test_clean_text(self): dirty = "Hello\xa0World\xad" clean = clean_text(dirty) self.assertEqual(clean, "Hello World") if __name__ == '__main__': unittest.main()