-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdetermine_language.py
46 lines (36 loc) · 1.79 KB
/
determine_language.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class CodeLanguageIdentifier:
"""
Identifies the programming language of a code block.
Looks for the presence of specific keywords unique to each programming language.
Attributes:
languages_patterns (dict): A dictionary mapping programming languages to lists
of unique keywords and syntax patterns associated with each language.
"""
def __init__(self):
self.languages_patterns = {
#'Python': ['def ', 'import ', 'from ', 'class ', ':', 'print(', 'lambda '],
'Python': ['def ', 'import ', 'from ', 'class ', r'(?<!:):(?!:)', 'print(', 'lambda '],
'JavaScript': ['function ', '=>', 'var ', 'let ', 'const ', 'console.log('],
'Java': ['public class', 'public static void main', 'import java.', 'new '],
'C++': ['#include ', 'int main()', 'std::', 'cout <<', 'cin >>'],
'Rust': ['fn ', 'let ', 'mut ', 'match ', 'trait ', 'enum '],
'Kotlin': ['fun ', 'val ', 'var ', 'println(', 'import '],
}
def identify_language(self, code_block):
"""
Identifies the programming language of a code block.
Args:
code_block (str): The code block string
Returns:
str: The language with the highest score or, if there is no match, it returns "Unknown".
"""
scores = {language: 0 for language in self.languages_patterns}
for language, patterns in self.languages_patterns.items():
for pattern in patterns:
if pattern in code_block:
scores[language] += 1
# find highest score
identified_language = max(scores, key=scores.get)
if scores[identified_language] == 0:
return "Unknown"
return identified_language