1
+ """
2
+ This module provides a regression test for results of running the readability
3
+ algorithm on a variety of different real-world examples. For each page in the
4
+ test suite, a benchmark was captured that represents the current readability
5
+ results. Note that these are not necessarily ideal results, just the ones used
6
+ as a benchmark.
7
+
8
+ This allows you to tweak and change the readability algorithm and see how it
9
+ changes existing results, hopefully for the better.
10
+
11
+ """
1
12
import lxml .html
2
13
import lxml .html .diff
3
14
import os
25
36
26
37
class ReadabilityTest :
27
38
28
- def __init__ (self , dir_path , enabled , name , desc , orig_path , rdbl_path ):
39
+ def __init__ (
40
+ self , dir_path , enabled , name , desc , notes , orig_path , rdbl_path
41
+ ):
29
42
self .dir_path = dir_path
30
43
self .enabled = enabled
31
44
self .name = name
32
45
self .desc = desc
46
+ self .notes = notes
33
47
self .orig_path = orig_path
34
48
self .rdbl_path = rdbl_path
35
49
@@ -63,11 +77,16 @@ def make_readability_test(dir_path, name, spec_dict):
63
77
enabled = spec_dict ['enabled' ]
64
78
else :
65
79
enabled = True
80
+ if 'notes' in spec_dict :
81
+ notes = spec_dict ['notes' ]
82
+ else :
83
+ notes = ''
66
84
return ReadabilityTest (
67
85
dir_path ,
68
86
enabled ,
69
87
name ,
70
88
spec_dict ['test_description' ],
89
+ notes ,
71
90
make_path (dir_path , name , ORIGINAL_SUFFIX ),
72
91
make_path (dir_path , name , READABLE_SUFFIX )
73
92
)
@@ -99,8 +118,6 @@ def execute_test(test_data):
99
118
else :
100
119
doc = readability .Document (test_data .orig_html )
101
120
summary = doc .summary ()
102
- benchmark_doc = (test_data .rdbl_html , 'benchmark' )
103
- result_doc = (summary .html , 'result' )
104
121
diff = lxml .html .diff .htmldiff (test_data .rdbl_html , summary .html )
105
122
return ReadabilityTestResult (test_data , summary .html , diff )
106
123
@@ -139,12 +156,14 @@ class ResultSummary():
139
156
140
157
def __init__ (self , result ):
141
158
doc = lxml .html .fragment_fromstring (result .diff_html )
159
+
142
160
insertions = doc .xpath ('//ins' )
143
161
insertion_lengths = element_string_lengths (insertions )
144
- deletions = doc .xpath ('//del' )
145
- deletion_lengths = element_string_lengths (deletions )
146
162
self .insertions = sum (insertion_lengths )
147
163
self .insertion_blocks = len (insertions )
164
+
165
+ deletions = doc .xpath ('//del' )
166
+ deletion_lengths = element_string_lengths (deletions )
148
167
self .deletions = sum (deletion_lengths )
149
168
self .deletion_blocks = len (deletions )
150
169
pass
@@ -169,15 +188,17 @@ def output(suffix):
169
188
B .A ('result' , href = output (RESULT_SUFFIX )),
170
189
' ' ,
171
190
B .A ('diff' , href = output (DIFF_SUFFIX ))
172
- )
191
+ ),
192
+ B .TD (test .notes )
173
193
)
174
194
else :
175
195
return B .TR (
176
196
B .CLASS ('skipped' ),
177
197
B .TD ('%s (SKIPPED)' % test .name ),
178
198
B .TD ('N/A' ),
179
199
B .TD ('N/A' ),
180
- B .TD ('N/A' )
200
+ B .TD ('N/A' ),
201
+ B .TD (test .notes )
181
202
)
182
203
183
204
@@ -187,7 +208,8 @@ def make_summary_doc(tests_w_results):
187
208
B .TH ('Test Name' ),
188
209
B .TH ('Inserted (in # of blocks)' ),
189
210
B .TH ('Deleted (in # of blocks)' ),
190
- B .TH ('Links' )
211
+ B .TH ('Links' ),
212
+ B .TH ('Notes' )
191
213
)
192
214
)
193
215
for (test , result ) in tests_w_results :
@@ -253,7 +275,7 @@ def write_summary(path, tests_w_results):
253
275
254
276
def add_css (doc ):
255
277
style = B .STYLE (CSS , type = 'text/css' )
256
- head = B .HEAD (style )
278
+ head = B .HEAD (style , content = 'text/html; charset=utf-8' )
257
279
doc .insert (0 , head )
258
280
259
281
0 commit comments