| |
|
| |
|
| |
|
| | import os
|
| | from .parser import DFG_python,DFG_java,DFG_ruby,DFG_go,DFG_php,DFG_javascript,DFG_csharp
|
| | from .parser import (remove_comments_and_docstrings,
|
| | tree_to_token_index,
|
| | index_to_code_token,
|
| | tree_to_variable_index)
|
| | from tree_sitter import Language, Parser
|
| |
|
| | dfg_function={
|
| | 'python':DFG_python,
|
| | 'java':DFG_java,
|
| | 'ruby':DFG_ruby,
|
| | 'go':DFG_go,
|
| | 'php':DFG_php,
|
| | 'javascript':DFG_javascript,
|
| | 'c_sharp':DFG_csharp,
|
| | }
|
| |
|
| | def calc_syntax_match(references, candidate, lang):
|
| | return corpus_syntax_match([references], [candidate], lang)
|
| |
|
| | def corpus_syntax_match(references, candidates, lang):
|
| |
|
| | JAVA_LANGUAGE = Language(os.path.abspath(os.path.dirname(__file__)) + '/parser/my-languages.so', lang)
|
| | parser = Parser()
|
| | parser.set_language(JAVA_LANGUAGE)
|
| | match_count = 0
|
| | total_count = 0
|
| |
|
| | for i in range(len(candidates)):
|
| | references_sample = references[i]
|
| | candidate = candidates[i]
|
| | for reference in references_sample:
|
| | try:
|
| | candidate=remove_comments_and_docstrings(candidate,'java')
|
| | except:
|
| | pass
|
| | try:
|
| | reference=remove_comments_and_docstrings(reference,'java')
|
| | except:
|
| | pass
|
| |
|
| | candidate_tree = parser.parse(bytes(candidate,'utf8')).root_node
|
| |
|
| | reference_tree = parser.parse(bytes(reference,'utf8')).root_node
|
| |
|
| | def get_all_sub_trees(root_node):
|
| | node_stack = []
|
| | sub_tree_sexp_list = []
|
| | depth = 1
|
| | node_stack.append([root_node, depth])
|
| | while len(node_stack) != 0:
|
| | cur_node, cur_depth = node_stack.pop()
|
| | sub_tree_sexp_list.append([cur_node.sexp(), cur_depth])
|
| | for child_node in cur_node.children:
|
| | if len(child_node.children) != 0:
|
| | depth = cur_depth + 1
|
| | node_stack.append([child_node, depth])
|
| | return sub_tree_sexp_list
|
| | cand_sexps = [x[0] for x in get_all_sub_trees(candidate_tree)]
|
| | ref_sexps = get_all_sub_trees(reference_tree)
|
| |
|
| |
|
| |
|
| |
|
| | for sub_tree, depth in ref_sexps:
|
| | if sub_tree in cand_sexps:
|
| | match_count += 1
|
| | total_count += len(ref_sexps)
|
| |
|
| | score = match_count / total_count
|
| | return score
|
| |
|