8000 [tests] remove `test_sdpa_equivalence` (redundant) (#37911) · huggingface/transformers@40a493c · GitHub
[go: up one dir, main page]

Skip to content

Commit 40a493c

Browse files
ganteydshieh
andauthored
[tests] remove test_sdpa_equivalence (redundant) (#37911)
* rm test_sdpa_equivalence * make fixup --------- Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
1 parent ea29f61 commit 40a493c

File tree

6 files changed

+0
-81
lines changed

6 files changed

+0
-81
lines changed

tests/models/aya_vision/test_modeling_aya_vision.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -297,10 +297,6 @@ def test_generate_continue_from_inputs_embeds(self):
297297< 8000 div class="diff-text-inner"> def test_multi_gpu_data_parallel_forward(self):
298298
pass
299299

300-
@unittest.skip("Cohere2's eager attn/sdpa attn outputs are expected to be different")
301-
def test_sdpa_equivalence(self):
302-
pass
303-
304300
@unittest.skip(reason="SiglipVisionModel does not support standalone training")
305301
def test_training(self):
306302
pass

tests/models/cohere2/test_modeling_cohere2.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,6 @@ def test_generate_from_inputs_embeds_with_static_cache(self):
127127
def test_generate_continue_from_inputs_embeds(self):
128128
pass
129129

130-
@unittest.skip("Cohere2's eager attn/sdpa attn outputs are expected to be different")
131-
def test_sdpa_equivalence(self):
132-
pass
133-
134130

135131
@slow
136132
@require_read_token

tests/models/deepseek_v3/test_modeling_deepseek_v3.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -300,10 +300,6 @@ def test_generate_from_inputs_embeds_with_static_cache(self):
300300
def test_generate_continue_from_inputs_embeds(self):
301301
pass
302302

303-
@unittest.skip("DeepseekV3's eager attn/sdpa attn outputs are expected to be different")
304-
def test_sdpa_equivalence(self):
305-
pass
306-
307303
@unittest.skip("Deepseek-V3 uses MLA so it is not compatible with the standard cache format")
308304
def test_beam_search_generate_dict_outputs_use_cache(self):
309305
pass

tests/models/gemma/test_modeling_gemma.py

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -303,38 +303,6 @@ def test_Gemma_token_classification_model(self):
303303
def test_flash_attn_2_inference_equivalence_right_padding(self):
304304
self.skipTest(reason="Gemma flash attention does not support right padding")
305305

306-
@require_torch_sdpa
307-
@require_torch_accelerator
308-
@slow
309-
def test_sdpa_equivalence(self):
310-
for model_class in self.all_model_classes:
311-
if not model_class._supports_sdpa:
312-
self.skipTest(reason="Model does not support SDPA")
313-
314-
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
315-
model = model_class(config)
316-
317-
with tempfile.TemporaryDirectory() as tmpdirname:
318-
model.save_pretrained(tmpdirname)
319-
model_sdpa = model_class.from_pretrained(
320-
tmpdirname, torch_dtype=torch.float16, attn_implementation="sdpa"
321-
)
322-
model_sdpa.to(torch_device)
323-
324-
model = model_class.from_pretrained(tmpdirname, torch_dtype=torch.float16, attn_implementation="eager")
325-
model.to(torch_device)
326-
327-
dummy_input = inputs_dict[model_class.main_input_name]
328-
dummy_input = dummy_input.to(torch_device)
329-
outputs = model(dummy_input, output_hidden_states=True)
330-
outputs_sdpa = model_sdpa(dummy_input, output_hidden_states=True)
331-
332-
logits = outputs.hidden_states[-1]
333-
logits_sdpa = outputs_sdpa.hidden_states[-1]
334-
335-
# gemma sdpa needs a high tolerance
336-
assert torch.allclose(logits_sdpa, logits, atol=3e-3)
337-
338306
@require_flash_attn
339307
@require_torch_gpu
340308
@pytest.mark.flash_attn_test

tests/models/gemma2/test_modeling_gemma2.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,10 +143,6 @@ def test_generate_from_inputs_embeds_with_static_cache(self):
143143
def test_generate_continue_from_inputs_embeds(self):
144144
pass
145145

146-
@unittest.skip("Gemma2's eager attn/sdpa attn outputs are expected to be different")
147-
def test_sdpa_equivalence(self):
148-
pass
149-
150146
@unittest.skip(
151147
reason="HybridCache can't be gathered because it is not iterable. Adding a simple iter and dumping `distributed_iterator`"
152148
" as in Dynamic Cache doesn't work. NOTE: @gante all cache objects would need better compatibility with multi gpu setting"

tests/models/nemotron/test_modeling_nemotron.py

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
require_torch,
2929
require_torch_accelerator,
3030
require_torch_gpu,
31-
require_torch_sdpa,
3231
slow,
3332
torch_device,
3433
)
@@ -102,38 +101,6 @@ def setUp(self):
102101
def test_model_outputs_equivalence(self, **kwargs):
103102
pass
104103

105-
@require_torch_sdpa
106-
@require_torch_accelerator
107-
@slow
108-
def test_sdpa_equivalence(self):
109-
for model_class in self.all_model_classes:
110-
if not model_class._supports_sdpa:
111-
self.skipTest(reason="Model does not support SDPA")
112-
113-
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
114-
model = model_class(config)
115-
116-
with tempfile.TemporaryDirectory() as tmpdirname:
117-
model.save_pretrained(tmpdirname)
118-
model_sdpa = model_class.from_pretrained(
119-
tmpdirname, torch_dtype=torch.float16, attn_implementation="sdpa"
120-
)
121-
model_sdpa.to(torch_device)
122-
123-
model = model_class.from_pretrained(tmpdirname, torch_dtype=torch.float16, attn_implementation="eager")
124-
model.to(torch_device)
125-
126-
dummy_input = inputs_dict[model_class.main_input_name]
127-
dummy_input = dummy_input.to(torch_device)
128-
outputs = model(dummy_input, output_hidden_states=True)
129-
outputs_sdpa = model_sdpa(dummy_input, output_hidden_states=True)
130-
131-
logits = outputs.hidden_states[-1]
132-
logits_sdpa = outputs_sdpa.hidden_states[-1]
133-
134-
# nemotron sdpa needs a high tolerance
135-
assert torch.allclose(logits_sdpa, logits, atol=1e-2)
136-
137104
@require_flash_attn
138105
@require_torch_gpu
139106
@pytest.mark.flash_attn_test

0 commit comments

Comments
 (0)
0