| import abc_1
|
| import time
|
| import sys
|
| from docx import Document
|
| from pdfminer.high_level import extract_text
|
| import json
|
|
|
| if __name__ == '__main__':
|
| start = time.time()
|
| if len(sys.argv) > 1:
|
| data = sys.argv[1]
|
| categories_keywords_dict = json.loads(data)
|
| else:
|
| print("No data provided.")
|
| categories_keywords_dict1 = {
|
| 'AI': ['Artificial', 'Intelligence'],
|
| 'Automata': ['finite', 'state', 'machines'],
|
| 'DT': ['game', 'theory']
|
| }
|
|
|
| input='input'
|
| output='output'
|
| compiled_keywords = abc_1.compile_keywords(categories_keywords_dict1)
|
| abc_1.multi_process_categorizer(input, output , compiled_keywords, num_processes=8)
|
| end = time.time()
|
| print(f"Categorization completed in {end - start:.2f} seconds") |