8000 Merge pull request #112 from codellm-devkit/108-unnecessary-dependenc… · codellm-devkit/python-sdk@f706c1b · GitHub
[go: up one dir, main page]

Skip to content

Commit f706c1b

Browse files
authored
Merge pull request #112 from codellm-devkit/108-unnecessary-dependency-on-clangllvm-for-non-cc-analysis
Fix issue 108: Unnecessary dependency on clang/llvm for non c/cpp analysis
2 parents 4ff354f + 2d3df9b commit f706c1b

File tree

21 files changed

+578
-518
lines changed

21 files changed

+578
-518
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ lint: ## Run the linter
3131
.PHONY: test
3232
test: ## Run the unit tests
3333
$(info Running tests...)
34-
pytest --pspec --cov=cldk --cov-fail-under=70 --disable-warnings
34+
pytest --pspec --cov=cldk --cov-fail-under=75 --disable-warnings
3535

3636
##@ Build
3737

cldk/analysis/c/c_analysis.py

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@
1818
Analysis model for C projects
1919
"""
2020

21+
import os
2122
from pathlib import Path
2223
from typing import Dict, List, Optional
2324
import networkx as nx
2425

25-
2626
from cldk.analysis.c.clang import ClangAnalyzer
2727
from cldk.models.c import CApplication, CFunction, CTranslationUnit, CMacro, CTypedef, CStruct, CEnum, CVariable
2828

@@ -36,7 +36,7 @@ def __init__(self, project_dir: Path) -> None:
3636
self.c_application = self._init_application(project_dir)
3737

3838
def _init_application(self, project_dir: Path) -> CApplication:
39-
"""Initializes the C application object.
39+
"""Should initialize the C application object.
4040
4141
Args:
4242
project_dir (Path): Path to the project directory.
@@ -56,7 +56,7 @@ def _init_application(self, project_dir: Path) -> CApplication:
5656
return CApplication(translation_units=translation_units)
5757

5858
def get_c_application(self) -> CApplication:
59-
"""returns the C application object.
59 10000 +
"""Obtain the C application object.
6060
6161
Returns:
6262
CApplication: C application object.
@@ -90,15 +90,15 @@ def is_parsable(self, source_code: str) -> bool:
9090
raise NotImplementedError("Support for this functionality has not been implemented yet.")
9191

9292
def get_call_graph(self) -> nx.DiGraph:
93-
"""returns the call graph of the C code.
93+
"""Should return the call graph of the C code.
9494
9595
Returns:
9696
nx.DiGraph: The call graph of the C code.
9797
"""
9898
raise NotImplementedError("Support for this functionality has not been implemented yet.")
9999

100100
def get_call_graph_json(self) -> str:
101-
"""returns a serialized call graph in json.
101+
"""Should return a serialized call graph in json.
102102
103103
Raises:
104104
NotImplementedError: Raised when this functionality is not suported.
@@ -110,7 +110,7 @@ def get_call_graph_json(self) -> str:
110110
raise NotImplementedError("Producing a call graph over a single file is not implemented yet.")
111111

112112
def get_callers(self, function: CFunction) -> Dict:
113-
"""returns a dictionary of callers of the target method.
113+
"""Should return a dictionary of callers of the target method.
114114
115115
Args:
116116
function (CFunction): A CFunction object.
@@ -125,7 +125,7 @@ def get_callers(self, function: CFunction) -> Dict:
125125
raise NotImplementedError("Generating all callers over a single file is not implemented yet.")
126126

127127
def get_callees(self, function: CFunction) -> Dict:
128-
"""returns a dictionary of callees in a fuction.
128+
"""Should return a dictionary of callees in a fuction.
129129
130130
Args:
131131
function (CFunction): A CFunction object.
@@ -139,7 +139,7 @@ def get_callees(self, function: CFunction) -> Dict:
139139
raise NotImplementedError("Generating all callees over a single file is not implemented yet.")
140140

141141
def get_functions(self) -> Dict[str, CFunction]:
142-
"""returns all functions in the project.
142+
"""Should return all functions in the project.
143143
144144
Raises:
145145
NotImplementedError: Raised when current AnalysisEngine does not support this function.
@@ -151,7 +151,7 @@ def get_functions(self) -> Dict[str, CFunction]:
151151
return translation_unit.functions
152152

153153
def get_function(self, function_name: str, file_name: Optional[str]) -> CFunction | List[CFunction]:
154-
"""returns a function object given the function name.
154+
"""Should return a function object given the function name.
155155
156156
Args:
157157
function_name (str): The name of the function.
@@ -163,7 +163,7 @@ def get_function(self, function_name: str, file_name: Optional[str]) -> CFunctio
163163
raise NotImplementedError("Support for this functionality has not been implemented yet.")
164164

165165
def get_C_file(self, file_name: str) -> str:
166-
B3E2 """returns a class given qualified class name.
166+
"""Should return a class given qualified class name.
167167
168168
Args:
169169
file_name (str): The name of the file.
@@ -191,7 +191,7 @@ def get_C_compilation_unit(self, file_path: str) -> CTranslationUnit:
191191
return self.c_application.translation_units.get(file_path)
192192

193193
def get_functions_in_file(self, file_name: str) -> List[CFunction]:
194-
"""returns a dictionary of all methods of the given class.
194+
"""Should return a dictionary of all methods of the given class.
195195
196196
Args:
197197
file_name (str): The name of the file.
@@ -205,7 +205,7 @@ def get_functions_in_file(self, file_name: str) -> List[CFunction]:
205205
raise NotImplementedError("Support for this functionality has not been implemented yet.")
206206

207207
def get_macros(self) -> List[CMacro]:
208-
"""returns a list of all macros in the C code.
208+
"""Should return a list of all macros in the C code.
209209
210210
Raises:
211211
NotImplementedError: Raised when current AnalysisEngine does not support this function.
@@ -216,7 +216,7 @@ def get_macros(self) -> List[CMacro]:
216216
raise NotImplementedError("Support for this functionality has not been implemented yet.")
217217

218218
def get_macros_in_file(self, file_name: str) -> List[CMacro] | None:
219-
"""returns a list of all macros in the given file.
219+
"""Should return a list of all macros in the given file.
220220
221221
Args:
222222
file_name (str): The name of the file.
@@ -231,7 +231,7 @@ def get_macros_in_file(self, file_name: str) -> List[CMacro] | None:
231231

232232

233233
def get_includes(self) -> List[str]:
234-
"""returns a list of all include statements across all files in the C code.
234+
"""Should return a list of all include statements across all files in the C code.
235235
236236
Returns:
237237
List[str]: A list of all include statements. Returns empty list if none found.
@@ -243,7 +243,7 @@ def get_includes(self) -> List[str]:
243243

244244

245245
def get_includes_in_file(self, file_name: str) -> List[str] | None:
246-
"""returns a list of all include statements in the given file.
246+
"""Should return a list of all include statements in the given file.
247247
248248
Args:
249249
file_name (str): The name of the file to search in.
@@ -257,7 +257,7 @@ def get_includes_in_file(self, file_name: str) -> List[str] | None:
257257

258258

259259
def get_macros(self) -> List[CMacro]:
260-
"""returns a list of all macro definitions across all files in the C code.
260+
"""Should return a list of all macro definitions across all files in the C code.
261261
262262
Returns:
263263
List[CMacro]: A list of all macro definitions. Returns empty list if none found.
@@ -269,7 +269,7 @@ def get_macros(self) -> List[CMacro]:
269269

270270

271271
def get_macros_in_file(self, file_name: str) -> List[CMacro] | None:
272-
"""returns a list of all macro definitions in the given file.
272+
"""Should return a list of all macro definitions in the given file.
273273
274274
Args:
275275
file_name (str): The name of the file to search in.
@@ -283,7 +283,7 @@ def get_macros_in_file(self, file_name: str) -> List[CMacro] | None:
283283

284284

285285
def get_typedefs(self) -> List[CTypedef]:
286-
"""returns a list of all typedef declarations across all files in the C code.
286+
"""Should return a list of all typedef declarations across all files in the C code.
287287
288288
Returns:
289289
List[CTypedef]: A list of all typedef declarations. Returns empty list if none found.
@@ -295,7 +295,7 @@ def get_typedefs(self) -> List[CTypedef]:
295295

296296

297297
def get_typedefs_in_file(self, file_name: str) -> List[CTypedef] | None:
298-
"""returns a list of all typedef declarations in the given file.
298+
"""Should return a list of all typedef declarations in the given file.
299299
300300
Args:
301301
file_name (str): The name of the file to search in.
@@ -309,7 +309,7 @@ def get_typedefs_in_file(self, file_name: str) -> List[CTypedef] | None:
309309

310310

311311
def get_structs(self) -> List[CStruct]:
312-
"""returns a list of all struct/union declarations across all files in the C code.
312+
"""Should return a list of all struct/union declarations across all files in the C code.
313313
314314
Returns:
315315
List[CStruct]: A list of all struct/union declarations. Returns empty list if none found.
@@ -321,7 +321,7 @@ def get_structs(self) -> List[CStruct]:
321321

322322

323323
def get_structs_in_file(self, file_name: str) -> List[CStruct] | None:
324-
"""returns a list of all struct/union declarations in the given file.
324+
"""Should return a list of all struct/union declarations in the given file.
325325
326326
Args:
327327
file_name (str): The name of the file to search in.
@@ -335,7 +335,7 @@ def get_structs_in_file(self, file_name: str) -> List[CStruct] | None:
335335

336336

337337
def get_enums(self) -> List[CEnum]:
338-
"""returns a list of all enum declarations across all files in the C code.
338+
"""Should return a list of all enum declarations across all files in the C code.
339339
340340
Returns:
341341
List[CEnum]: A list of all enum declarations. Returns empty list if none found.
@@ -347,7 +347,7 @@ def get_enums(self) -> List[CEnum]:
347347

348348

349349
def get_enums_in_file(self, file_name: str) -> List[CEnum] | None:
350-
"""returns a list of all enum declarations in the given file.
350+
"""Should return a list of all enum declarations in the given file.
351351
352352
Args:
353353
file_name (str): The name of the file to search in.
@@ -361,7 +361,7 @@ def get_enums_in_file(self, file_name: str) -> List[CEnum] | None:
361361

362362

363363
def get_globals(self, file_name: str) -> List[CVariable] | None:
364-
"""returns a list of all global variable declarations in the given file.
364+
"""Should return a list of all global variable declarations in the given file.
365365
366366
Args:
367367
file_name (str): The name of the file to search in.

cldk/analysis/c/clang/clang_analyzer.py

Lines changed: 62 additions & 57 deletions
27A9
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
from pdb import set_trace
23
import platform
34
from clang.cindex import Config
45
from pathlib import Path
@@ -12,75 +13,79 @@
1213

1314
# First, we only import Config from clang.cindex
1415
from clang.cindex import Config
15-
16-
17-
def find_libclang() -> str:
18-
"""
19-
Locates the libclang library on the system based on the operating system.
20-
This function runs before any other Clang functionality is used, ensuring
21-
proper initialization of the Clang environment.
22-
"""
23-
system = platform.system()
24-
25-
# On macOS, we check both Apple Silicon and Intel paths
26-
if system == "Darwin":
27-
possible_paths = [
28-
"/opt/homebrew/opt/llvm/lib/libclang.dylib", # Apple Silicon
29-
"/usr/local/opt/llvm/lib/libclang.dylib", # Intel Mac
30-
"/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/libclang.dylib",
31-
]
32-
install_instructions = "Install LLVM using: brew install llvm"
33-
34-
# On Linux, we check various common installation paths
35-
elif system == "Linux":
36-
from pathlib import Path
37-
38-
lib_paths = [Path("/usr/lib"), Path("/usr/lib64")]
39-
possible_paths = [
40-
str(p) for base in lib_pa F438 ths if base.exists()
41-
for p in base.rglob("libclang*.so*")
42-
]
43-
44-
install_instructions = "Install libclang development package using your system's package manager"
45-
else:
46-
raise RuntimeError(f"Unsupported operating system: {system}")
47-
48-
# Check each possible path and return the first one that exists
49-
for path in possible_paths:
50-
if os.path.exists(path):
51-
logger.info(f"Found libclang at: {path}")
52-
return path
53-
54-
# If no library is found, provide clear installation instructions
55-
raise RuntimeError(f"Could not find libclang library. \n" f"Please ensure LLVM is installed:\n{install_instructions}")
56-
57-
58-
# Initialize libclang at module level
59-
try:
60-
libclang_path = find_libclang()
61-
Config.set_library_file(libclang_path)
62-
logger.info("Successfully initialized libclang")
63-
64-
# Now that libclang is initialized, we can safely import other Clang components
65-
from clang.cindex import Index, TranslationUnit, CursorKind, TypeKind, CompilationDatabase
66-
67-
except Exception as e:
68-
logger.error(f"Failed to initialize libclang: {e}")
69-
raise
16+
from clang.cindex import Index, TranslationUnit, CursorKind, TypeKind, CompilationDatabase
7017

7118

7219
class ClangAnalyzer:
7320
"""Analyzes C code using Clang's Python bindings."""
7421

7522
def __init__(self, compilation_database_path: Optional[Path] = None):
76-
# Configure Clang before creating the Index
23+
# # Let's turn off Address sanitization for parsing code
24+
# # Initialize libclang at module level
25+
# try:
26+
if platform.system() == "Darwin":
27+
possible_paths = [
28+
"/opt/homebrew/opt/llvm/lib/libclang.dylib", # Apple Silicon
29+
"/usr/local/opt/llvm/lib/libclang.dylib", # Intel Mac
30+
"/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/libclang.dylib",
31+
]
32+
33+
# We could not find libclang. Raise an error and provide instructions.
34+
if len(possible_paths) == 0:
35+
raise RuntimeError("Install LLVM 18 using: brew install llvm@18")
36+
37+
# Check each possible path and return the first one that exists
38+
for path in possible_paths:
39+
if os.path.exists(path):
40+
logger.info(f"Found libclang at: {path}")
41+
# Configure Clang before creating the Index
42+
Config.set_library_file(path)
43+
7744
self.index = Index.create()
7845
self.compilation_database = None
7946
# TODO: Implement compilation database for C/C++ projects so that we can get compile arguments for each file
8047
# and parse them correctly. This is useful for projects with complex build systems.
8148
if compilation_database_path:
8249
self.compilation_database = CompilationDatabase.fromDirectory(str(compilation_database_path))
8350

51+
def __find_libclang(self) -> str:
52+
"""
53+
Locates the libclang library on the system based on the operating system.
54+
This function runs before any other Clang functionality is used, ensuring
55+
proper initialization of the Clang environment.
56+
"""
57+
58+
system = platform.system()
59+
60+
# On macOS, we check both Apple Silicon and Intel paths
61+
if system == "Darwin":
62+
possible_paths = [
63+
"/opt/homebrew/opt/llvm/lib/libclang.dylib", # Apple Silicon
64+
"/usr/local/opt/llvm/lib/libclang.dylib", # Intel Mac
65+
"/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/libclang.dylib",
66+
]
67+
install_instructions = "Install LLVM using: brew install llvm"
68+
69+
# On Linux, we check various common installation paths
70+
elif system == "Linux":
71+
from pathlib import Path
72+
73+
lib_paths = [Path("/usr/lib"), Path("/usr/lib64")]
74+
possible_paths = [str(p) for base in lib_paths if base.exists() for p in base.rglob("libclang*.so.17*")]
75+
print(possible_paths)
76+
install_instructions = "Install libclang development package using your system's package manager"
77+
else:
78+
raise RuntimeError(f"Unsupported operating system: {system}")
79+
80+
# Check each possible path and return the first one that exists
81+
for path in possible_paths:
82+
if os.path.exists(path):
83+
logger.info(f"Found libclang at: {path}")
84+
return path
85+
86+
# If no library is found, provide clear installation instructions
87+
raise RuntimeError(f"Could not find libclang library. \n" f"Please ensure LLVM is installed:\n{install_instructions}")
88+
8489
def analyze_file(self, file_path: Path) -> CTranslationUnit:
8590
"""Analyzes a single C source file using Clang."""
8691

@@ -105,7 +110,7 @@ def analyze_file(self, file_path: Path) -> CTranslationUnit:
105110
return translation_unit
106111

107112
def _process_translation_unit(self, cursor, translation_unit: CTranslationUnit):
108-
"""Processes all declarations in a translation unit."""
113+
"""Should process all declarations in a translation unit."""
109114

110115
for child in cursor.get_children():
111116
if child.location.file and str(child.location.file) != translation_unit.file_path:

0 commit comments

Comments
 (0)
0