8000 Merge pull request #6254 from rvraghav93/cythonize_when_pxd_changes · scikit-learn/scikit-learn@bf9cd88 · GitHub
[go: up one dir, main page]

Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit bf9cd88

Browse files
committed
Merge pull request #6254 from rvraghav93/cythonize_when_pxd_changes
[MRG+2] ENH/MAINT Check for changes in pxd files too. Cleanup code2
2 parents a7038a4 + b93ccb9 commit bf9cd88

File tree

1 file changed

+89
-74
lines changed

1 file changed

+89
-74
lines changed

sklearn/_build_utils/cythonize.py

Lines changed: 89 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
corresponding C files. If they have, then runs cython on these files to
1212
recreate the C files.
1313
14-
The script detects changes in the pyx files using checksums [or hashes] stored
15-
in a database file
14+
The script detects changes in the pyx/pxd files using checksums
15+
[or hashes] stored in a database file
1616
1717
Simple script to invoke Cython on all .pyx
1818
files; while waiting for a proper build system. Uses file hashes to
@@ -27,10 +27,13 @@
2727
We copied it for scikit-learn.
2828
2929
Note: this script does not check any of the dependent C libraries; it only
30-
operates on the Cython .pyx files.
30+
operates on the Cython .pyx files or their corresponding Cython header (.pxd)
31+
files.
3132
"""
32-
# author: Arthur Mensch
33-
# license: BSD
33+
# Author: Arthur Mensch <arthur.mensch@inria.fr>
34+
# Author: Raghav R V <rvraghav93@gmail.com>
35+
#
36+
# License: BSD 3 clause
3437

3538
from __future__ import division, print_function, absolute_import
3639

@@ -50,10 +53,7 @@
5053
WindowsError = None
5154

5255

53-
#
54-
# Rules
55-
#
56-
def process_pyx(fromfile, tofile):
56+
def cythonize(cython_file, gen_file):
5757
try:
5858
from Cython.Compiler.Version import version as cython_version
5959
from distutils.version import LooseVersion
@@ -64,53 +64,53 @@ def process_pyx(fromfile, tofile):
6464
pass
6565

6666
flags = ['--fast-fail']
67-
if tofile.endswith('.cpp'):
67+
if gen_file.endswith('.cpp'):
6868
flags += ['--cplus']
6969

7070
try:
7171
try:
72-
r = subprocess.call(['cython'] + flags + ["-o", tofile, fromfile])
73-
if r != 0:
74-
raise Exception('Cython failed')
72+
rc = subprocess.call(['cython'] +
73+
flags + ["-o", gen_file, cython_file])
74+
if rc != 0:
75+
raise Exception('Cythonizing %s failed' % cython_file)
7576
except OSError:
7677
# There are ways of installing Cython that don't result in a cython
7778
# executable on the path, see scipy issue gh-2397.
78-
r = subprocess.call([sys.executable, '-c',
79-
'import sys; from Cython.Compiler.Main '
80-
'import setuptools_main as main;'
81-
' sys.exit(main())'] + flags +
82-
["-o", tofile, fromfile])
83-
if r != 0:
84-
raise Exception('Cython failed')
79+
rc = subprocess.call([sys.executable, '-c',
80+
'import sys; from Cython.Compiler.Main '
81+
'import setuptools_main as main;'
82+
' sys.exit(main())'] + flags +
83+
["-o", gen_file, cython_file])
84+
if rc != 0:
85+
raise Exception('Cythonizing %s failed' % cython_file)
8586
except OSError:
8687
raise OSError('Cython needs to be installed')
8788

8889

89-
rules = {
90-
'.pyx': process_pyx,
91-
}
92-
93-
94-
#
95-
# Hash db
96-
#
9790
def load_hashes(filename):
98-
# Return { filename : (sha1 of input, sha1 of output) }
99-
if os.path.isfile(filename):
100-
hashes = {}
101-
with open(filename, 'r') as f:
102-
for line in f:
103-
filename, inhash, outhash = line.split()
104-
hashes[filename] = (inhash, outhash)
105-
else:
91+
"""Load the hashes dict from the hashfile"""
92+
# { filename : (sha1 of header if available or 'NA',
93+
# sha1 of input,
94+
# sha1 of output) }
95+
96+
hashes = {}
97+
try:
98+
with open(filename, 'r') as cython_hash_file:
99+
for hash_record in cython_hash_file:
100+
(filename, header_hash,
101+
cython_hash, gen_file_hash) = hash_record.split()
102+
hashes[filename] = (header_hash, cython_hash, gen_file_hash)
103+
except (KeyError, ValueError, AttributeError, IOError):
106104
hashes = {}
107105
return hashes
108106

109107

110-
def save_hashes(hash_db, filename):
111-
with open(filename, 'w') as f:
112-
for key, value in hash_db.items():
113-
f.write("%s %s %s\n" % (key, value[0], value[1]))
108+
def save_hashes(hashes, filename):
109+
"""Save the hashes dict to the hashfile"""
110+
with open(filename, 'w') as cython_hash_file:
111+
for key, value in hashes.items():
112+
cython_hash_file.write("%s %s %s %s\n"
113+
% (key, value[0], value[1], value[2]))
114114

115115

116116
def sha1_of_file(filename):
@@ -120,59 +120,74 @@ def sha1_of_file(filename):
120120
return h.hexdigest()
121121

122122

123-
#
124-
# Main program
125-
#
126-
def normpath(path):
123+
def clean_path(path):
124+
"""Clean the path"""
127125
path = path.replace(os.sep, '/')
128126
if path.startswith('./'):
129127
path = path[2:]
130128
return path
131129

132130

133-
def get_hash(frompath, topath):
134-
from_hash = sha1_of_file(frompath)
135-
to_hash = sha1_of_file(topath) if os.path.exists(topath) else None
136-
return from_hash, to_hash
131+
def get_hash_tuple(header_path, cython_path, gen_file_path):
132+
"""Get the hashes from the given files"""
133+
134+
header_hash = (sha1_of_file(header_path)
135+
if os.path.exists(header_path) else 'NA')
136+
from_hash = sha1_of_file(cython_path)
137+
to_hash = (sha1_of_file(gen_file_path)
138+
if os.path.exists(gen_file_path) else 'NA')
139+
140+
return header_hash, from_hash, to_hash
141+
137142

143+
def cythonize_if_unchanged(path, cython_file, gen_file, hashes):
144+
full_cython_path = os.path.join(path, cython_file)
145+
full_header_path = full_cython_path.replace('.pyx', '.pxd')
146+
full_gen_file_path = os.path.join(path, gen_file)
138147

139-
def process(path, fromfile, tofile, processor_function, hash_db):
140-
fullfrompath = os.path.join(path, fromfile)
141-
fulltopath = os.path.join(path, tofile)
142-
current_hash = get_hash(fullfrompath, fulltopath)
143-
if current_hash == hash_db.get(normpath(fullfrompath)):
144-
print('%s has not changed' % fullfrompath)
148+
current_hash = get_hash_tuple(full_header_path, full_cython_path,
149+
full_gen_file_path)
150+
151+
if current_hash == hashes.get(clean_path(full_cython_path)):
152+
print('%s has not changed' % full_cython_path)
145153
return
146154

147-
print('Processing %s' % fullfrompath)
148-
processor_function(fullfrompath, fulltopath)
155+
print('Processing %s' % full_cython_path)
156+
cythonize(full_cython_path, full_gen_file_path)
157+
149158
# changed target file, recompute hash
150-
current_hash = get_hash(fullfrompath, fulltopath)
151-
# store hash in db
152-
hash_db[normpath(fullfrompath)] = current_hash
159+
current_hash = get_hash_tuple(full_header_path, full_cython_path,
160+
full_gen_file_path)
153161

162+
# Update the hashes dict with the new hash
163+
hashes[clean_path(full_cython_path)] = current_hash
154164

155-
def find_process_files(root_dir):
165+
166+
def check_and_cythonize(root_dir):
156167
print(root_dir)
157-
hash_db = load_hashes(HASH_FILE)
168+
hashes = load_hashes(HASH_FILE)
169+
158170
for cur_dir, dirs, files in os.walk(root_dir):
159171
for filename in files:
160-
for fromext, function in rules.items():
161-
if filename.endswith(fromext):
162-
toext = ".c"
163-
with open(os.path.join(cur_dir, filename), 'rb') as f:
164-
data = f.read()
165-
m = re.search(b"libcpp", data, re.I | re.M)
166-
if m:
167-
toext = ".cpp"
168-
fromfile = filename
169-
tofile = filename[:-len(fromext)] + toext
170-
process(cur_dir, fromfile, tofile, function, hash_db)
171-
save_hashes(hash_db, HASH_FILE)
172+
if filename.endswith('.pyx'):
173+
gen_file_ext = '.c'
174+
# Cython files with libcpp imports should be compiled to cpp
175+
with open(os.path.join(cur_dir, filename), 'rb') as f:
176+
data = f.read()
177+
m = re.search(b"libcpp", data, re.I | re.M)
178+
if m:
179+
gen_file_ext = ".cpp"
180+
cython_file = filename
181+
gen_file = filename.replace('.pyx', gen_file_ext)
182+
cythonize_if_unchanged(cur_dir, cython_file, gen_file, hashes)
183+
184+
# Save hashes once per module. This prevents cythonizing prev.
185+
# files again when debugging broken code in a single file
186+
save_hashes(hashes, HASH_FILE)
172187

173188

174189
def main(root_dir=DEFAULT_ROOT):
175-
find_process_files(root_dir)
190+
check_and_cythonize(root_dir)
176191

177192

178193
if __name__ == '__main__':

0 commit comments

Comments
 (0)
0