Build uploaded using `kernels`.
Browse files- build/torch210-cxx11-cpu-x86_64-linux/{_flash_attn2_588b404.abi3.so → _flash_attn2_cpu_9f0ed09.abi3.so} +2 -2
- build/torch210-cxx11-cpu-x86_64-linux/_ops.py +3 -3
- build/torch210-cxx11-cpu-x86_64-linux/flash_attn_interface.py +6 -8
- build/torch210-cxx11-cpu-x86_64-linux/metadata.json +1 -0
- build/torch210-cxx11-cu126-x86_64-linux/{_flash_attn2_588b404.abi3.so → _flash_attn2_cuda_9f0ed09.abi3.so} +2 -2
- build/torch210-cxx11-cu126-x86_64-linux/_ops.py +3 -3
- build/torch210-cxx11-cu126-x86_64-linux/flash_attn_interface.py +6 -8
- build/torch210-cxx11-cu126-x86_64-linux/metadata.json +10 -2
- build/torch210-cxx11-cu128-x86_64-linux/{_flash_attn2_588b404.abi3.so → _flash_attn2_cuda_9f0ed09.abi3.so} +2 -2
- build/torch210-cxx11-cu128-x86_64-linux/_ops.py +3 -3
- build/torch210-cxx11-cu128-x86_64-linux/flash_attn_interface.py +6 -8
- build/torch210-cxx11-cu128-x86_64-linux/metadata.json +12 -2
- build/torch210-cxx11-cu130-x86_64-linux/{_flash_attn2_588b404.abi3.so → _flash_attn2_cuda_9f0ed09.abi3.so} +2 -2
- build/torch210-cxx11-cu130-x86_64-linux/_ops.py +3 -3
- build/torch210-cxx11-cu130-x86_64-linux/flash_attn_interface.py +6 -8
- build/torch210-cxx11-cu130-x86_64-linux/metadata.json +12 -2
- build/{torch29-cxx11-xpu20252-x86_64-linux/_flash_attn2_660971e.abi3.so → torch210-cxx11-xpu20253-x86_64-linux/_flash_attn2_xpu_9f0ed09.abi3.so} +2 -2
- build/torch210-cxx11-xpu20253-x86_64-linux/_ops.py +3 -3
- build/torch210-cxx11-xpu20253-x86_64-linux/flash_attn_interface.py +3 -5
- build/torch210-cxx11-xpu20253-x86_64-linux/metadata.json +1 -0
- build/torch29-cxx11-cpu-x86_64-linux/{_flash_attn2_588b404.abi3.so → _flash_attn2_cpu_9f0ed09.abi3.so} +2 -2
- build/torch29-cxx11-cpu-x86_64-linux/_ops.py +3 -3
- build/torch29-cxx11-cpu-x86_64-linux/flash_attn_interface.py +6 -8
- build/torch29-cxx11-cpu-x86_64-linux/metadata.json +1 -0
- build/torch29-cxx11-cu126-x86_64-linux/_flash_attn2_588b404.abi3.so +0 -3
- build/torch29-cxx11-cu126-x86_64-linux/_flash_attn2_cuda_9f0ed09.abi3.so +3 -0
- build/torch29-cxx11-cu126-x86_64-linux/_ops.py +3 -3
- build/torch29-cxx11-cu126-x86_64-linux/flash_attn_interface.py +6 -8
- build/torch29-cxx11-cu126-x86_64-linux/metadata.json +10 -2
- build/torch29-cxx11-cu128-x86_64-linux/_flash_attn2_588b404.abi3.so +0 -3
- build/torch29-cxx11-cu128-x86_64-linux/_flash_attn2_cuda_9f0ed09.abi3.so +3 -0
- build/torch29-cxx11-cu128-x86_64-linux/_ops.py +3 -3
- build/torch29-cxx11-cu128-x86_64-linux/flash_attn_interface.py +6 -8
- build/torch29-cxx11-cu128-x86_64-linux/metadata.json +12 -2
- build/torch29-cxx11-cu130-x86_64-linux/_flash_attn2_588b404.abi3.so +0 -3
- build/torch29-cxx11-cu130-x86_64-linux/_flash_attn2_cuda_9f0ed09.abi3.so +3 -0
- build/torch29-cxx11-cu130-x86_64-linux/_ops.py +3 -3
- build/torch29-cxx11-cu130-x86_64-linux/flash_attn_interface.py +6 -8
- build/torch29-cxx11-cu130-x86_64-linux/metadata.json +12 -2
- build/{torch210-cxx11-xpu20253-x86_64-linux/_flash_attn2_660971e.abi3.so → torch29-cxx11-xpu20252-x86_64-linux/_flash_attn2_xpu_9f0ed09.abi3.so} +2 -2
- build/torch29-cxx11-xpu20252-x86_64-linux/_ops.py +3 -3
- build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn_interface.py +3 -5
- build/torch29-cxx11-xpu20252-x86_64-linux/metadata.json +1 -0
build/torch210-cxx11-cpu-x86_64-linux/{_flash_attn2_588b404.abi3.so → _flash_attn2_cpu_9f0ed09.abi3.so}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45757a80c809dcbf1a8c75bde0c42dcba171960901f1c085699036d908a81c20
|
| 3 |
+
size 1942496
|
build/torch210-cxx11-cpu-x86_64-linux/_ops.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import torch
|
| 2 |
-
from . import
|
| 3 |
-
ops = torch.ops.
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
-
return f"
|
|
|
|
| 1 |
import torch
|
| 2 |
+
from . import _flash_attn2_cpu_9f0ed09
|
| 3 |
+
ops = torch.ops._flash_attn2_cpu_9f0ed09
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
+
return f"_flash_attn2_cpu_9f0ed09::{op_name}"
|
build/torch210-cxx11-cpu-x86_64-linux/flash_attn_interface.py
CHANGED
|
@@ -31,8 +31,6 @@ def _get_device():
|
|
| 31 |
else:
|
| 32 |
return "cpu"
|
| 33 |
|
| 34 |
-
_XPU_AVAILABLE = torch.xpu.is_available() if hasattr(torch, "xpu") else False # TODO remove hasattr check when bwd is supported on XPU
|
| 35 |
-
|
| 36 |
|
| 37 |
def _get_block_size_n(device, head_dim, is_dropout, is_causal):
|
| 38 |
# This should match the block sizes in the CUDA kernel
|
|
@@ -1064,7 +1062,7 @@ def flash_attn_qkvpacked_func(
|
|
| 1064 |
alibi_slopes,
|
| 1065 |
deterministic,
|
| 1066 |
return_attn_probs,
|
| 1067 |
-
|
| 1068 |
)
|
| 1069 |
|
| 1070 |
|
|
@@ -1142,7 +1140,7 @@ def flash_attn_kvpacked_func(
|
|
| 1142 |
alibi_slopes,
|
| 1143 |
deterministic,
|
| 1144 |
return_attn_probs,
|
| 1145 |
-
|
| 1146 |
)
|
| 1147 |
|
| 1148 |
|
|
@@ -1219,7 +1217,7 @@ def flash_attn_func(
|
|
| 1219 |
alibi_slopes,
|
| 1220 |
deterministic,
|
| 1221 |
return_attn_probs,
|
| 1222 |
-
|
| 1223 |
)
|
| 1224 |
|
| 1225 |
|
|
@@ -1285,7 +1283,7 @@ def flash_attn_varlen_qkvpacked_func(
|
|
| 1285 |
alibi_slopes,
|
| 1286 |
deterministic,
|
| 1287 |
return_attn_probs,
|
| 1288 |
-
|
| 1289 |
)
|
| 1290 |
|
| 1291 |
|
|
@@ -1377,7 +1375,7 @@ def flash_attn_varlen_kvpacked_func(
|
|
| 1377 |
alibi_slopes,
|
| 1378 |
deterministic,
|
| 1379 |
return_attn_probs,
|
| 1380 |
-
|
| 1381 |
)
|
| 1382 |
|
| 1383 |
|
|
@@ -1471,7 +1469,7 @@ def flash_attn_varlen_func(
|
|
| 1471 |
deterministic,
|
| 1472 |
return_attn_probs,
|
| 1473 |
block_table,
|
| 1474 |
-
False if
|
| 1475 |
)
|
| 1476 |
|
| 1477 |
|
|
|
|
| 31 |
else:
|
| 32 |
return "cpu"
|
| 33 |
|
|
|
|
|
|
|
| 34 |
|
| 35 |
def _get_block_size_n(device, head_dim, is_dropout, is_causal):
|
| 36 |
# This should match the block sizes in the CUDA kernel
|
|
|
|
| 1062 |
alibi_slopes,
|
| 1063 |
deterministic,
|
| 1064 |
return_attn_probs,
|
| 1065 |
+
torch.is_grad_enabled(),
|
| 1066 |
)
|
| 1067 |
|
| 1068 |
|
|
|
|
| 1140 |
alibi_slopes,
|
| 1141 |
deterministic,
|
| 1142 |
return_attn_probs,
|
| 1143 |
+
torch.is_grad_enabled(),
|
| 1144 |
)
|
| 1145 |
|
| 1146 |
|
|
|
|
| 1217 |
alibi_slopes,
|
| 1218 |
deterministic,
|
| 1219 |
return_attn_probs,
|
| 1220 |
+
torch.is_grad_enabled(),
|
| 1221 |
)
|
| 1222 |
|
| 1223 |
|
|
|
|
| 1283 |
alibi_slopes,
|
| 1284 |
deterministic,
|
| 1285 |
return_attn_probs,
|
| 1286 |
+
torch.is_grad_enabled(),
|
| 1287 |
)
|
| 1288 |
|
| 1289 |
|
|
|
|
| 1375 |
alibi_slopes,
|
| 1376 |
deterministic,
|
| 1377 |
return_attn_probs,
|
| 1378 |
+
torch.is_grad_enabled(),
|
| 1379 |
)
|
| 1380 |
|
| 1381 |
|
|
|
|
| 1469 |
deterministic,
|
| 1470 |
return_attn_probs,
|
| 1471 |
block_table,
|
| 1472 |
+
False if q.device.type == "cpu" else torch.is_grad_enabled(),
|
| 1473 |
)
|
| 1474 |
|
| 1475 |
|
build/torch210-cxx11-cpu-x86_64-linux/metadata.json
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
{
|
| 2 |
"version": 1,
|
|
|
|
| 3 |
"python-depends": []
|
| 4 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"version": 1,
|
| 3 |
+
"license": "BSD-3-Clause",
|
| 4 |
"python-depends": []
|
| 5 |
}
|
build/torch210-cxx11-cu126-x86_64-linux/{_flash_attn2_588b404.abi3.so → _flash_attn2_cuda_9f0ed09.abi3.so}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:039e336f68c3efaa02ef0b103a2607b2a544d68a46fa0165e2433464f26223a3
|
| 3 |
+
size 448709016
|
build/torch210-cxx11-cu126-x86_64-linux/_ops.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import torch
|
| 2 |
-
from . import
|
| 3 |
-
ops = torch.ops.
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
-
return f"
|
|
|
|
| 1 |
import torch
|
| 2 |
+
from . import _flash_attn2_cuda_9f0ed09
|
| 3 |
+
ops = torch.ops._flash_attn2_cuda_9f0ed09
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
+
return f"_flash_attn2_cuda_9f0ed09::{op_name}"
|
build/torch210-cxx11-cu126-x86_64-linux/flash_attn_interface.py
CHANGED
|
@@ -31,8 +31,6 @@ def _get_device():
|
|
| 31 |
else:
|
| 32 |
return "cpu"
|
| 33 |
|
| 34 |
-
_XPU_AVAILABLE = torch.xpu.is_available() if hasattr(torch, "xpu") else False # TODO remove hasattr check when bwd is supported on XPU
|
| 35 |
-
|
| 36 |
|
| 37 |
def _get_block_size_n(device, head_dim, is_dropout, is_causal):
|
| 38 |
# This should match the block sizes in the CUDA kernel
|
|
@@ -1064,7 +1062,7 @@ def flash_attn_qkvpacked_func(
|
|
| 1064 |
alibi_slopes,
|
| 1065 |
deterministic,
|
| 1066 |
return_attn_probs,
|
| 1067 |
-
|
| 1068 |
)
|
| 1069 |
|
| 1070 |
|
|
@@ -1142,7 +1140,7 @@ def flash_attn_kvpacked_func(
|
|
| 1142 |
alibi_slopes,
|
| 1143 |
deterministic,
|
| 1144 |
return_attn_probs,
|
| 1145 |
-
|
| 1146 |
)
|
| 1147 |
|
| 1148 |
|
|
@@ -1219,7 +1217,7 @@ def flash_attn_func(
|
|
| 1219 |
alibi_slopes,
|
| 1220 |
deterministic,
|
| 1221 |
return_attn_probs,
|
| 1222 |
-
|
| 1223 |
)
|
| 1224 |
|
| 1225 |
|
|
@@ -1285,7 +1283,7 @@ def flash_attn_varlen_qkvpacked_func(
|
|
| 1285 |
alibi_slopes,
|
| 1286 |
deterministic,
|
| 1287 |
return_attn_probs,
|
| 1288 |
-
|
| 1289 |
)
|
| 1290 |
|
| 1291 |
|
|
@@ -1377,7 +1375,7 @@ def flash_attn_varlen_kvpacked_func(
|
|
| 1377 |
alibi_slopes,
|
| 1378 |
deterministic,
|
| 1379 |
return_attn_probs,
|
| 1380 |
-
|
| 1381 |
)
|
| 1382 |
|
| 1383 |
|
|
@@ -1471,7 +1469,7 @@ def flash_attn_varlen_func(
|
|
| 1471 |
deterministic,
|
| 1472 |
return_attn_probs,
|
| 1473 |
block_table,
|
| 1474 |
-
False if
|
| 1475 |
)
|
| 1476 |
|
| 1477 |
|
|
|
|
| 31 |
else:
|
| 32 |
return "cpu"
|
| 33 |
|
|
|
|
|
|
|
| 34 |
|
| 35 |
def _get_block_size_n(device, head_dim, is_dropout, is_causal):
|
| 36 |
# This should match the block sizes in the CUDA kernel
|
|
|
|
| 1062 |
alibi_slopes,
|
| 1063 |
deterministic,
|
| 1064 |
return_attn_probs,
|
| 1065 |
+
torch.is_grad_enabled(),
|
| 1066 |
)
|
| 1067 |
|
| 1068 |
|
|
|
|
| 1140 |
alibi_slopes,
|
| 1141 |
deterministic,
|
| 1142 |
return_attn_probs,
|
| 1143 |
+
torch.is_grad_enabled(),
|
| 1144 |
)
|
| 1145 |
|
| 1146 |
|
|
|
|
| 1217 |
alibi_slopes,
|
| 1218 |
deterministic,
|
| 1219 |
return_attn_probs,
|
| 1220 |
+
torch.is_grad_enabled(),
|
| 1221 |
)
|
| 1222 |
|
| 1223 |
|
|
|
|
| 1283 |
alibi_slopes,
|
| 1284 |
deterministic,
|
| 1285 |
return_attn_probs,
|
| 1286 |
+
torch.is_grad_enabled(),
|
| 1287 |
)
|
| 1288 |
|
| 1289 |
|
|
|
|
| 1375 |
alibi_slopes,
|
| 1376 |
deterministic,
|
| 1377 |
return_attn_probs,
|
| 1378 |
+
torch.is_grad_enabled(),
|
| 1379 |
)
|
| 1380 |
|
| 1381 |
|
|
|
|
| 1469 |
deterministic,
|
| 1470 |
return_attn_probs,
|
| 1471 |
block_table,
|
| 1472 |
+
False if q.device.type == "cpu" else torch.is_grad_enabled(),
|
| 1473 |
)
|
| 1474 |
|
| 1475 |
|
build/torch210-cxx11-cu126-x86_64-linux/metadata.json
CHANGED
|
@@ -1,4 +1,12 @@
|
|
| 1 |
{
|
| 2 |
"version": 1,
|
| 3 |
-
"
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
{
|
| 2 |
"version": 1,
|
| 3 |
+
"license": "BSD-3-Clause",
|
| 4 |
+
"python-depends": [],
|
| 5 |
+
"backend": {
|
| 6 |
+
"type": "cuda",
|
| 7 |
+
"archs": [
|
| 8 |
+
"8.0",
|
| 9 |
+
"9.0"
|
| 10 |
+
]
|
| 11 |
+
}
|
| 12 |
+
}
|
build/torch210-cxx11-cu128-x86_64-linux/{_flash_attn2_588b404.abi3.so → _flash_attn2_cuda_9f0ed09.abi3.so}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac175faf91cfb9cd9827985bae32380035cb9f880f0bbb702e7f045eee90ae0a
|
| 3 |
+
size 1037803408
|
build/torch210-cxx11-cu128-x86_64-linux/_ops.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import torch
|
| 2 |
-
from . import
|
| 3 |
-
ops = torch.ops.
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
-
return f"
|
|
|
|
| 1 |
import torch
|
| 2 |
+
from . import _flash_attn2_cuda_9f0ed09
|
| 3 |
+
ops = torch.ops._flash_attn2_cuda_9f0ed09
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
+
return f"_flash_attn2_cuda_9f0ed09::{op_name}"
|
build/torch210-cxx11-cu128-x86_64-linux/flash_attn_interface.py
CHANGED
|
@@ -31,8 +31,6 @@ def _get_device():
|
|
| 31 |
else:
|
| 32 |
return "cpu"
|
| 33 |
|
| 34 |
-
_XPU_AVAILABLE = torch.xpu.is_available() if hasattr(torch, "xpu") else False # TODO remove hasattr check when bwd is supported on XPU
|
| 35 |
-
|
| 36 |
|
| 37 |
def _get_block_size_n(device, head_dim, is_dropout, is_causal):
|
| 38 |
# This should match the block sizes in the CUDA kernel
|
|
@@ -1064,7 +1062,7 @@ def flash_attn_qkvpacked_func(
|
|
| 1064 |
alibi_slopes,
|
| 1065 |
deterministic,
|
| 1066 |
return_attn_probs,
|
| 1067 |
-
|
| 1068 |
)
|
| 1069 |
|
| 1070 |
|
|
@@ -1142,7 +1140,7 @@ def flash_attn_kvpacked_func(
|
|
| 1142 |
alibi_slopes,
|
| 1143 |
deterministic,
|
| 1144 |
return_attn_probs,
|
| 1145 |
-
|
| 1146 |
)
|
| 1147 |
|
| 1148 |
|
|
@@ -1219,7 +1217,7 @@ def flash_attn_func(
|
|
| 1219 |
alibi_slopes,
|
| 1220 |
deterministic,
|
| 1221 |
return_attn_probs,
|
| 1222 |
-
|
| 1223 |
)
|
| 1224 |
|
| 1225 |
|
|
@@ -1285,7 +1283,7 @@ def flash_attn_varlen_qkvpacked_func(
|
|
| 1285 |
alibi_slopes,
|
| 1286 |
deterministic,
|
| 1287 |
return_attn_probs,
|
| 1288 |
-
|
| 1289 |
)
|
| 1290 |
|
| 1291 |
|
|
@@ -1377,7 +1375,7 @@ def flash_attn_varlen_kvpacked_func(
|
|
| 1377 |
alibi_slopes,
|
| 1378 |
deterministic,
|
| 1379 |
return_attn_probs,
|
| 1380 |
-
|
| 1381 |
)
|
| 1382 |
|
| 1383 |
|
|
@@ -1471,7 +1469,7 @@ def flash_attn_varlen_func(
|
|
| 1471 |
deterministic,
|
| 1472 |
return_attn_probs,
|
| 1473 |
block_table,
|
| 1474 |
-
False if
|
| 1475 |
)
|
| 1476 |
|
| 1477 |
|
|
|
|
| 31 |
else:
|
| 32 |
return "cpu"
|
| 33 |
|
|
|
|
|
|
|
| 34 |
|
| 35 |
def _get_block_size_n(device, head_dim, is_dropout, is_causal):
|
| 36 |
# This should match the block sizes in the CUDA kernel
|
|
|
|
| 1062 |
alibi_slopes,
|
| 1063 |
deterministic,
|
| 1064 |
return_attn_probs,
|
| 1065 |
+
torch.is_grad_enabled(),
|
| 1066 |
)
|
| 1067 |
|
| 1068 |
|
|
|
|
| 1140 |
alibi_slopes,
|
| 1141 |
deterministic,
|
| 1142 |
return_attn_probs,
|
| 1143 |
+
torch.is_grad_enabled(),
|
| 1144 |
)
|
| 1145 |
|
| 1146 |
|
|
|
|
| 1217 |
alibi_slopes,
|
| 1218 |
deterministic,
|
| 1219 |
return_attn_probs,
|
| 1220 |
+
torch.is_grad_enabled(),
|
| 1221 |
)
|
| 1222 |
|
| 1223 |
|
|
|
|
| 1283 |
alibi_slopes,
|
| 1284 |
deterministic,
|
| 1285 |
return_attn_probs,
|
| 1286 |
+
torch.is_grad_enabled(),
|
| 1287 |
)
|
| 1288 |
|
| 1289 |
|
|
|
|
| 1375 |
alibi_slopes,
|
| 1376 |
deterministic,
|
| 1377 |
return_attn_probs,
|
| 1378 |
+
torch.is_grad_enabled(),
|
| 1379 |
)
|
| 1380 |
|
| 1381 |
|
|
|
|
| 1469 |
deterministic,
|
| 1470 |
return_attn_probs,
|
| 1471 |
block_table,
|
| 1472 |
+
False if q.device.type == "cpu" else torch.is_grad_enabled(),
|
| 1473 |
)
|
| 1474 |
|
| 1475 |
|
build/torch210-cxx11-cu128-x86_64-linux/metadata.json
CHANGED
|
@@ -1,4 +1,14 @@
|
|
| 1 |
{
|
| 2 |
"version": 1,
|
| 3 |
-
"
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
{
|
| 2 |
"version": 1,
|
| 3 |
+
"license": "BSD-3-Clause",
|
| 4 |
+
"python-depends": [],
|
| 5 |
+
"backend": {
|
| 6 |
+
"type": "cuda",
|
| 7 |
+
"archs": [
|
| 8 |
+
"10.0",
|
| 9 |
+
"12.0",
|
| 10 |
+
"8.0",
|
| 11 |
+
"9.0"
|
| 12 |
+
]
|
| 13 |
+
}
|
| 14 |
+
}
|
build/torch210-cxx11-cu130-x86_64-linux/{_flash_attn2_588b404.abi3.so → _flash_attn2_cuda_9f0ed09.abi3.so}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e67e00b79ea3625b0ec32a083544d1808cce682dfe593a7212c525a292d1764f
|
| 3 |
+
size 1009055088
|
build/torch210-cxx11-cu130-x86_64-linux/_ops.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import torch
|
| 2 |
-
from . import
|
| 3 |
-
ops = torch.ops.
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
-
return f"
|
|
|
|
| 1 |
import torch
|
| 2 |
+
from . import _flash_attn2_cuda_9f0ed09
|
| 3 |
+
ops = torch.ops._flash_attn2_cuda_9f0ed09
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
+
return f"_flash_attn2_cuda_9f0ed09::{op_name}"
|
build/torch210-cxx11-cu130-x86_64-linux/flash_attn_interface.py
CHANGED
|
@@ -31,8 +31,6 @@ def _get_device():
|
|
| 31 |
else:
|
| 32 |
return "cpu"
|
| 33 |
|
| 34 |
-
_XPU_AVAILABLE = torch.xpu.is_available() if hasattr(torch, "xpu") else False # TODO remove hasattr check when bwd is supported on XPU
|
| 35 |
-
|
| 36 |
|
| 37 |
def _get_block_size_n(device, head_dim, is_dropout, is_causal):
|
| 38 |
# This should match the block sizes in the CUDA kernel
|
|
@@ -1064,7 +1062,7 @@ def flash_attn_qkvpacked_func(
|
|
| 1064 |
alibi_slopes,
|
| 1065 |
deterministic,
|
| 1066 |
return_attn_probs,
|
| 1067 |
-
|
| 1068 |
)
|
| 1069 |
|
| 1070 |
|
|
@@ -1142,7 +1140,7 @@ def flash_attn_kvpacked_func(
|
|
| 1142 |
alibi_slopes,
|
| 1143 |
deterministic,
|
| 1144 |
return_attn_probs,
|
| 1145 |
-
|
| 1146 |
)
|
| 1147 |
|
| 1148 |
|
|
@@ -1219,7 +1217,7 @@ def flash_attn_func(
|
|
| 1219 |
alibi_slopes,
|
| 1220 |
deterministic,
|
| 1221 |
return_attn_probs,
|
| 1222 |
-
|
| 1223 |
)
|
| 1224 |
|
| 1225 |
|
|
@@ -1285,7 +1283,7 @@ def flash_attn_varlen_qkvpacked_func(
|
|
| 1285 |
alibi_slopes,
|
| 1286 |
deterministic,
|
| 1287 |
return_attn_probs,
|
| 1288 |
-
|
| 1289 |
)
|
| 1290 |
|
| 1291 |
|
|
@@ -1377,7 +1375,7 @@ def flash_attn_varlen_kvpacked_func(
|
|
| 1377 |
alibi_slopes,
|
| 1378 |
deterministic,
|
| 1379 |
return_attn_probs,
|
| 1380 |
-
|
| 1381 |
)
|
| 1382 |
|
| 1383 |
|
|
@@ -1471,7 +1469,7 @@ def flash_attn_varlen_func(
|
|
| 1471 |
deterministic,
|
| 1472 |
return_attn_probs,
|
| 1473 |
block_table,
|
| 1474 |
-
False if
|
| 1475 |
)
|
| 1476 |
|
| 1477 |
|
|
|
|
| 31 |
else:
|
| 32 |
return "cpu"
|
| 33 |
|
|
|
|
|
|
|
| 34 |
|
| 35 |
def _get_block_size_n(device, head_dim, is_dropout, is_causal):
|
| 36 |
# This should match the block sizes in the CUDA kernel
|
|
|
|
| 1062 |
alibi_slopes,
|
| 1063 |
deterministic,
|
| 1064 |
return_attn_probs,
|
| 1065 |
+
torch.is_grad_enabled(),
|
| 1066 |
)
|
| 1067 |
|
| 1068 |
|
|
|
|
| 1140 |
alibi_slopes,
|
| 1141 |
deterministic,
|
| 1142 |
return_attn_probs,
|
| 1143 |
+
torch.is_grad_enabled(),
|
| 1144 |
)
|
| 1145 |
|
| 1146 |
|
|
|
|
| 1217 |
alibi_slopes,
|
| 1218 |
deterministic,
|
| 1219 |
return_attn_probs,
|
| 1220 |
+
torch.is_grad_enabled(),
|
| 1221 |
)
|
| 1222 |
|
| 1223 |
|
|
|
|
| 1283 |
alibi_slopes,
|
| 1284 |
deterministic,
|
| 1285 |
return_attn_probs,
|
| 1286 |
+
torch.is_grad_enabled(),
|
| 1287 |
)
|
| 1288 |
|
| 1289 |
|
|
|
|
| 1375 |
alibi_slopes,
|
| 1376 |
deterministic,
|
| 1377 |
return_attn_probs,
|
| 1378 |
+
torch.is_grad_enabled(),
|
| 1379 |
)
|
| 1380 |
|
| 1381 |
|
|
|
|
| 1469 |
deterministic,
|
| 1470 |
return_attn_probs,
|
| 1471 |
block_table,
|
| 1472 |
+
False if q.device.type == "cpu" else torch.is_grad_enabled(),
|
| 1473 |
)
|
| 1474 |
|
| 1475 |
|
build/torch210-cxx11-cu130-x86_64-linux/metadata.json
CHANGED
|
@@ -1,4 +1,14 @@
|
|
| 1 |
{
|
| 2 |
"version": 1,
|
| 3 |
-
"
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
{
|
| 2 |
"version": 1,
|
| 3 |
+
"license": "BSD-3-Clause",
|
| 4 |
+
"python-depends": [],
|
| 5 |
+
"backend": {
|
| 6 |
+
"type": "cuda",
|
| 7 |
+
"archs": [
|
| 8 |
+
"10.0",
|
| 9 |
+
"12.0",
|
| 10 |
+
"8.0",
|
| 11 |
+
"9.0"
|
| 12 |
+
]
|
| 13 |
+
}
|
| 14 |
+
}
|
build/{torch29-cxx11-xpu20252-x86_64-linux/_flash_attn2_660971e.abi3.so → torch210-cxx11-xpu20253-x86_64-linux/_flash_attn2_xpu_9f0ed09.abi3.so}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:192cfb14df060c1fd913c7a6e5a588da44dec07d0a9adf156ed377d860c8d3c2
|
| 3 |
+
size 15436096
|
build/torch210-cxx11-xpu20253-x86_64-linux/_ops.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import torch
|
| 2 |
-
from . import
|
| 3 |
-
ops = torch.ops.
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
-
return f"
|
|
|
|
| 1 |
import torch
|
| 2 |
+
from . import _flash_attn2_xpu_9f0ed09
|
| 3 |
+
ops = torch.ops._flash_attn2_xpu_9f0ed09
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
+
return f"_flash_attn2_xpu_9f0ed09::{op_name}"
|
build/torch210-cxx11-xpu20253-x86_64-linux/flash_attn_interface.py
CHANGED
|
@@ -31,8 +31,6 @@ def _get_device():
|
|
| 31 |
else:
|
| 32 |
return "cpu"
|
| 33 |
|
| 34 |
-
_XPU_AVAILABLE = torch.xpu.is_available() if hasattr(torch, "xpu") else False # TODO remove hasattr check when bwd is supported on XPU
|
| 35 |
-
|
| 36 |
|
| 37 |
def _get_block_size_n(device, head_dim, is_dropout, is_causal):
|
| 38 |
# This should match the block sizes in the CUDA kernel
|
|
@@ -1285,7 +1283,7 @@ def flash_attn_varlen_qkvpacked_func(
|
|
| 1285 |
alibi_slopes,
|
| 1286 |
deterministic,
|
| 1287 |
return_attn_probs,
|
| 1288 |
-
|
| 1289 |
)
|
| 1290 |
|
| 1291 |
|
|
@@ -1377,7 +1375,7 @@ def flash_attn_varlen_kvpacked_func(
|
|
| 1377 |
alibi_slopes,
|
| 1378 |
deterministic,
|
| 1379 |
return_attn_probs,
|
| 1380 |
-
|
| 1381 |
)
|
| 1382 |
|
| 1383 |
|
|
@@ -1471,7 +1469,7 @@ def flash_attn_varlen_func(
|
|
| 1471 |
deterministic,
|
| 1472 |
return_attn_probs,
|
| 1473 |
block_table,
|
| 1474 |
-
False if
|
| 1475 |
)
|
| 1476 |
|
| 1477 |
|
|
|
|
| 31 |
else:
|
| 32 |
return "cpu"
|
| 33 |
|
|
|
|
|
|
|
| 34 |
|
| 35 |
def _get_block_size_n(device, head_dim, is_dropout, is_causal):
|
| 36 |
# This should match the block sizes in the CUDA kernel
|
|
|
|
| 1283 |
alibi_slopes,
|
| 1284 |
deterministic,
|
| 1285 |
return_attn_probs,
|
| 1286 |
+
torch.is_grad_enabled(),
|
| 1287 |
)
|
| 1288 |
|
| 1289 |
|
|
|
|
| 1375 |
alibi_slopes,
|
| 1376 |
deterministic,
|
| 1377 |
return_attn_probs,
|
| 1378 |
+
torch.is_grad_enabled(),
|
| 1379 |
)
|
| 1380 |
|
| 1381 |
|
|
|
|
| 1469 |
deterministic,
|
| 1470 |
return_attn_probs,
|
| 1471 |
block_table,
|
| 1472 |
+
False if q.device.type == "cpu" else torch.is_grad_enabled(),
|
| 1473 |
)
|
| 1474 |
|
| 1475 |
|
build/torch210-cxx11-xpu20253-x86_64-linux/metadata.json
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
{
|
| 2 |
"version": 1,
|
|
|
|
| 3 |
"python-depends": []
|
| 4 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"version": 1,
|
| 3 |
+
"license": "BSD-3-Clause",
|
| 4 |
"python-depends": []
|
| 5 |
}
|
build/torch29-cxx11-cpu-x86_64-linux/{_flash_attn2_588b404.abi3.so → _flash_attn2_cpu_9f0ed09.abi3.so}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7313920d802d0477ca3d4144bc3b11e3c4761ea0e42b55a9b1b0b05567d23f71
|
| 3 |
+
size 1932200
|
build/torch29-cxx11-cpu-x86_64-linux/_ops.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import torch
|
| 2 |
-
from . import
|
| 3 |
-
ops = torch.ops.
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
-
return f"
|
|
|
|
| 1 |
import torch
|
| 2 |
+
from . import _flash_attn2_cpu_9f0ed09
|
| 3 |
+
ops = torch.ops._flash_attn2_cpu_9f0ed09
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
+
return f"_flash_attn2_cpu_9f0ed09::{op_name}"
|
build/torch29-cxx11-cpu-x86_64-linux/flash_attn_interface.py
CHANGED
|
@@ -31,8 +31,6 @@ def _get_device():
|
|
| 31 |
else:
|
| 32 |
return "cpu"
|
| 33 |
|
| 34 |
-
_XPU_AVAILABLE = torch.xpu.is_available() if hasattr(torch, "xpu") else False # TODO remove hasattr check when bwd is supported on XPU
|
| 35 |
-
|
| 36 |
|
| 37 |
def _get_block_size_n(device, head_dim, is_dropout, is_causal):
|
| 38 |
# This should match the block sizes in the CUDA kernel
|
|
@@ -1064,7 +1062,7 @@ def flash_attn_qkvpacked_func(
|
|
| 1064 |
alibi_slopes,
|
| 1065 |
deterministic,
|
| 1066 |
return_attn_probs,
|
| 1067 |
-
|
| 1068 |
)
|
| 1069 |
|
| 1070 |
|
|
@@ -1142,7 +1140,7 @@ def flash_attn_kvpacked_func(
|
|
| 1142 |
alibi_slopes,
|
| 1143 |
deterministic,
|
| 1144 |
return_attn_probs,
|
| 1145 |
-
|
| 1146 |
)
|
| 1147 |
|
| 1148 |
|
|
@@ -1219,7 +1217,7 @@ def flash_attn_func(
|
|
| 1219 |
alibi_slopes,
|
| 1220 |
deterministic,
|
| 1221 |
return_attn_probs,
|
| 1222 |
-
|
| 1223 |
)
|
| 1224 |
|
| 1225 |
|
|
@@ -1285,7 +1283,7 @@ def flash_attn_varlen_qkvpacked_func(
|
|
| 1285 |
alibi_slopes,
|
| 1286 |
deterministic,
|
| 1287 |
return_attn_probs,
|
| 1288 |
-
|
| 1289 |
)
|
| 1290 |
|
| 1291 |
|
|
@@ -1377,7 +1375,7 @@ def flash_attn_varlen_kvpacked_func(
|
|
| 1377 |
alibi_slopes,
|
| 1378 |
deterministic,
|
| 1379 |
return_attn_probs,
|
| 1380 |
-
|
| 1381 |
)
|
| 1382 |
|
| 1383 |
|
|
@@ -1471,7 +1469,7 @@ def flash_attn_varlen_func(
|
|
| 1471 |
deterministic,
|
| 1472 |
return_attn_probs,
|
| 1473 |
block_table,
|
| 1474 |
-
False if
|
| 1475 |
)
|
| 1476 |
|
| 1477 |
|
|
|
|
| 31 |
else:
|
| 32 |
return "cpu"
|
| 33 |
|
|
|
|
|
|
|
| 34 |
|
| 35 |
def _get_block_size_n(device, head_dim, is_dropout, is_causal):
|
| 36 |
# This should match the block sizes in the CUDA kernel
|
|
|
|
| 1062 |
alibi_slopes,
|
| 1063 |
deterministic,
|
| 1064 |
return_attn_probs,
|
| 1065 |
+
torch.is_grad_enabled(),
|
| 1066 |
)
|
| 1067 |
|
| 1068 |
|
|
|
|
| 1140 |
alibi_slopes,
|
| 1141 |
deterministic,
|
| 1142 |
return_attn_probs,
|
| 1143 |
+
torch.is_grad_enabled(),
|
| 1144 |
)
|
| 1145 |
|
| 1146 |
|
|
|
|
| 1217 |
alibi_slopes,
|
| 1218 |
deterministic,
|
| 1219 |
return_attn_probs,
|
| 1220 |
+
torch.is_grad_enabled(),
|
| 1221 |
)
|
| 1222 |
|
| 1223 |
|
|
|
|
| 1283 |
alibi_slopes,
|
| 1284 |
deterministic,
|
| 1285 |
return_attn_probs,
|
| 1286 |
+
torch.is_grad_enabled(),
|
| 1287 |
)
|
| 1288 |
|
| 1289 |
|
|
|
|
| 1375 |
alibi_slopes,
|
| 1376 |
deterministic,
|
| 1377 |
return_attn_probs,
|
| 1378 |
+
torch.is_grad_enabled(),
|
| 1379 |
)
|
| 1380 |
|
| 1381 |
|
|
|
|
| 1469 |
deterministic,
|
| 1470 |
return_attn_probs,
|
| 1471 |
block_table,
|
| 1472 |
+
False if q.device.type == "cpu" else torch.is_grad_enabled(),
|
| 1473 |
)
|
| 1474 |
|
| 1475 |
|
build/torch29-cxx11-cpu-x86_64-linux/metadata.json
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
{
|
| 2 |
"version": 1,
|
|
|
|
| 3 |
"python-depends": []
|
| 4 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"version": 1,
|
| 3 |
+
"license": "BSD-3-Clause",
|
| 4 |
"python-depends": []
|
| 5 |
}
|
build/torch29-cxx11-cu126-x86_64-linux/_flash_attn2_588b404.abi3.so
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:82a33a975de0a2c8e2440d596ecde21e4f3e1e8dcc9df42843e2045edb1e6d47
|
| 3 |
-
size 448648728
|
|
|
|
|
|
|
|
|
|
|
|
build/torch29-cxx11-cu126-x86_64-linux/_flash_attn2_cuda_9f0ed09.abi3.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d422421456bf5ac34486ee898a7a6aaea7fff2edda3bce062d0283f69806275
|
| 3 |
+
size 448648752
|
build/torch29-cxx11-cu126-x86_64-linux/_ops.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import torch
|
| 2 |
-
from . import
|
| 3 |
-
ops = torch.ops.
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
-
return f"
|
|
|
|
| 1 |
import torch
|
| 2 |
+
from . import _flash_attn2_cuda_9f0ed09
|
| 3 |
+
ops = torch.ops._flash_attn2_cuda_9f0ed09
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
+
return f"_flash_attn2_cuda_9f0ed09::{op_name}"
|
build/torch29-cxx11-cu126-x86_64-linux/flash_attn_interface.py
CHANGED
|
@@ -31,8 +31,6 @@ def _get_device():
|
|
| 31 |
else:
|
| 32 |
return "cpu"
|
| 33 |
|
| 34 |
-
_XPU_AVAILABLE = torch.xpu.is_available() if hasattr(torch, "xpu") else False # TODO remove hasattr check when bwd is supported on XPU
|
| 35 |
-
|
| 36 |
|
| 37 |
def _get_block_size_n(device, head_dim, is_dropout, is_causal):
|
| 38 |
# This should match the block sizes in the CUDA kernel
|
|
@@ -1064,7 +1062,7 @@ def flash_attn_qkvpacked_func(
|
|
| 1064 |
alibi_slopes,
|
| 1065 |
deterministic,
|
| 1066 |
return_attn_probs,
|
| 1067 |
-
|
| 1068 |
)
|
| 1069 |
|
| 1070 |
|
|
@@ -1142,7 +1140,7 @@ def flash_attn_kvpacked_func(
|
|
| 1142 |
alibi_slopes,
|
| 1143 |
deterministic,
|
| 1144 |
return_attn_probs,
|
| 1145 |
-
|
| 1146 |
)
|
| 1147 |
|
| 1148 |
|
|
@@ -1219,7 +1217,7 @@ def flash_attn_func(
|
|
| 1219 |
alibi_slopes,
|
| 1220 |
deterministic,
|
| 1221 |
return_attn_probs,
|
| 1222 |
-
|
| 1223 |
)
|
| 1224 |
|
| 1225 |
|
|
@@ -1285,7 +1283,7 @@ def flash_attn_varlen_qkvpacked_func(
|
|
| 1285 |
alibi_slopes,
|
| 1286 |
deterministic,
|
| 1287 |
return_attn_probs,
|
| 1288 |
-
|
| 1289 |
)
|
| 1290 |
|
| 1291 |
|
|
@@ -1377,7 +1375,7 @@ def flash_attn_varlen_kvpacked_func(
|
|
| 1377 |
alibi_slopes,
|
| 1378 |
deterministic,
|
| 1379 |
return_attn_probs,
|
| 1380 |
-
|
| 1381 |
)
|
| 1382 |
|
| 1383 |
|
|
@@ -1471,7 +1469,7 @@ def flash_attn_varlen_func(
|
|
| 1471 |
deterministic,
|
| 1472 |
return_attn_probs,
|
| 1473 |
block_table,
|
| 1474 |
-
False if
|
| 1475 |
)
|
| 1476 |
|
| 1477 |
|
|
|
|
| 31 |
else:
|
| 32 |
return "cpu"
|
| 33 |
|
|
|
|
|
|
|
| 34 |
|
| 35 |
def _get_block_size_n(device, head_dim, is_dropout, is_causal):
|
| 36 |
# This should match the block sizes in the CUDA kernel
|
|
|
|
| 1062 |
alibi_slopes,
|
| 1063 |
deterministic,
|
| 1064 |
return_attn_probs,
|
| 1065 |
+
torch.is_grad_enabled(),
|
| 1066 |
)
|
| 1067 |
|
| 1068 |
|
|
|
|
| 1140 |
alibi_slopes,
|
| 1141 |
deterministic,
|
| 1142 |
return_attn_probs,
|
| 1143 |
+
torch.is_grad_enabled(),
|
| 1144 |
)
|
| 1145 |
|
| 1146 |
|
|
|
|
| 1217 |
alibi_slopes,
|
| 1218 |
deterministic,
|
| 1219 |
return_attn_probs,
|
| 1220 |
+
torch.is_grad_enabled(),
|
| 1221 |
)
|
| 1222 |
|
| 1223 |
|
|
|
|
| 1283 |
alibi_slopes,
|
| 1284 |
deterministic,
|
| 1285 |
return_attn_probs,
|
| 1286 |
+
torch.is_grad_enabled(),
|
| 1287 |
)
|
| 1288 |
|
| 1289 |
|
|
|
|
| 1375 |
alibi_slopes,
|
| 1376 |
deterministic,
|
| 1377 |
return_attn_probs,
|
| 1378 |
+
torch.is_grad_enabled(),
|
| 1379 |
)
|
| 1380 |
|
| 1381 |
|
|
|
|
| 1469 |
deterministic,
|
| 1470 |
return_attn_probs,
|
| 1471 |
block_table,
|
| 1472 |
+
False if q.device.type == "cpu" else torch.is_grad_enabled(),
|
| 1473 |
)
|
| 1474 |
|
| 1475 |
|
build/torch29-cxx11-cu126-x86_64-linux/metadata.json
CHANGED
|
@@ -1,4 +1,12 @@
|
|
| 1 |
{
|
| 2 |
"version": 1,
|
| 3 |
-
"
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
{
|
| 2 |
"version": 1,
|
| 3 |
+
"license": "BSD-3-Clause",
|
| 4 |
+
"python-depends": [],
|
| 5 |
+
"backend": {
|
| 6 |
+
"type": "cuda",
|
| 7 |
+
"archs": [
|
| 8 |
+
"8.0",
|
| 9 |
+
"9.0"
|
| 10 |
+
]
|
| 11 |
+
}
|
| 12 |
+
}
|
build/torch29-cxx11-cu128-x86_64-linux/_flash_attn2_588b404.abi3.so
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:7b3afc2eda58b3649ac67513c775ce1cb124e5498f8dbbbe4ef07db6857d56d3
|
| 3 |
-
size 1037644608
|
|
|
|
|
|
|
|
|
|
|
|
build/torch29-cxx11-cu128-x86_64-linux/_flash_attn2_cuda_9f0ed09.abi3.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29b22b8c1bbd77125b6c82aca5fecfe0416d2f116be7b1e1a4638f76fe542a2e
|
| 3 |
+
size 1037644632
|
build/torch29-cxx11-cu128-x86_64-linux/_ops.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import torch
|
| 2 |
-
from . import
|
| 3 |
-
ops = torch.ops.
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
-
return f"
|
|
|
|
| 1 |
import torch
|
| 2 |
+
from . import _flash_attn2_cuda_9f0ed09
|
| 3 |
+
ops = torch.ops._flash_attn2_cuda_9f0ed09
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
+
return f"_flash_attn2_cuda_9f0ed09::{op_name}"
|
build/torch29-cxx11-cu128-x86_64-linux/flash_attn_interface.py
CHANGED
|
@@ -31,8 +31,6 @@ def _get_device():
|
|
| 31 |
else:
|
| 32 |
return "cpu"
|
| 33 |
|
| 34 |
-
_XPU_AVAILABLE = torch.xpu.is_available() if hasattr(torch, "xpu") else False # TODO remove hasattr check when bwd is supported on XPU
|
| 35 |
-
|
| 36 |
|
| 37 |
def _get_block_size_n(device, head_dim, is_dropout, is_causal):
|
| 38 |
# This should match the block sizes in the CUDA kernel
|
|
@@ -1064,7 +1062,7 @@ def flash_attn_qkvpacked_func(
|
|
| 1064 |
alibi_slopes,
|
| 1065 |
deterministic,
|
| 1066 |
return_attn_probs,
|
| 1067 |
-
|
| 1068 |
)
|
| 1069 |
|
| 1070 |
|
|
@@ -1142,7 +1140,7 @@ def flash_attn_kvpacked_func(
|
|
| 1142 |
alibi_slopes,
|
| 1143 |
deterministic,
|
| 1144 |
return_attn_probs,
|
| 1145 |
-
|
| 1146 |
)
|
| 1147 |
|
| 1148 |
|
|
@@ -1219,7 +1217,7 @@ def flash_attn_func(
|
|
| 1219 |
alibi_slopes,
|
| 1220 |
deterministic,
|
| 1221 |
return_attn_probs,
|
| 1222 |
-
|
| 1223 |
)
|
| 1224 |
|
| 1225 |
|
|
@@ -1285,7 +1283,7 @@ def flash_attn_varlen_qkvpacked_func(
|
|
| 1285 |
alibi_slopes,
|
| 1286 |
deterministic,
|
| 1287 |
return_attn_probs,
|
| 1288 |
-
|
| 1289 |
)
|
| 1290 |
|
| 1291 |
|
|
@@ -1377,7 +1375,7 @@ def flash_attn_varlen_kvpacked_func(
|
|
| 1377 |
alibi_slopes,
|
| 1378 |
deterministic,
|
| 1379 |
return_attn_probs,
|
| 1380 |
-
|
| 1381 |
)
|
| 1382 |
|
| 1383 |
|
|
@@ -1471,7 +1469,7 @@ def flash_attn_varlen_func(
|
|
| 1471 |
deterministic,
|
| 1472 |
return_attn_probs,
|
| 1473 |
block_table,
|
| 1474 |
-
False if
|
| 1475 |
)
|
| 1476 |
|
| 1477 |
|
|
|
|
| 31 |
else:
|
| 32 |
return "cpu"
|
| 33 |
|
|
|
|
|
|
|
| 34 |
|
| 35 |
def _get_block_size_n(device, head_dim, is_dropout, is_causal):
|
| 36 |
# This should match the block sizes in the CUDA kernel
|
|
|
|
| 1062 |
alibi_slopes,
|
| 1063 |
deterministic,
|
| 1064 |
return_attn_probs,
|
| 1065 |
+
torch.is_grad_enabled(),
|
| 1066 |
)
|
| 1067 |
|
| 1068 |
|
|
|
|
| 1140 |
alibi_slopes,
|
| 1141 |
deterministic,
|
| 1142 |
return_attn_probs,
|
| 1143 |
+
torch.is_grad_enabled(),
|
| 1144 |
)
|
| 1145 |
|
| 1146 |
|
|
|
|
| 1217 |
alibi_slopes,
|
| 1218 |
deterministic,
|
| 1219 |
return_attn_probs,
|
| 1220 |
+
torch.is_grad_enabled(),
|
| 1221 |
)
|
| 1222 |
|
| 1223 |
|
|
|
|
| 1283 |
alibi_slopes,
|
| 1284 |
deterministic,
|
| 1285 |
return_attn_probs,
|
| 1286 |
+
torch.is_grad_enabled(),
|
| 1287 |
)
|
| 1288 |
|
| 1289 |
|
|
|
|
| 1375 |
alibi_slopes,
|
| 1376 |
deterministic,
|
| 1377 |
return_attn_probs,
|
| 1378 |
+
torch.is_grad_enabled(),
|
| 1379 |
)
|
| 1380 |
|
| 1381 |
|
|
|
|
| 1469 |
deterministic,
|
| 1470 |
return_attn_probs,
|
| 1471 |
block_table,
|
| 1472 |
+
False if q.device.type == "cpu" else torch.is_grad_enabled(),
|
| 1473 |
)
|
| 1474 |
|
| 1475 |
|
build/torch29-cxx11-cu128-x86_64-linux/metadata.json
CHANGED
|
@@ -1,4 +1,14 @@
|
|
| 1 |
{
|
| 2 |
"version": 1,
|
| 3 |
-
"
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
{
|
| 2 |
"version": 1,
|
| 3 |
+
"license": "BSD-3-Clause",
|
| 4 |
+
"python-depends": [],
|
| 5 |
+
"backend": {
|
| 6 |
+
"type": "cuda",
|
| 7 |
+
"archs": [
|
| 8 |
+
"10.0",
|
| 9 |
+
"12.0",
|
| 10 |
+
"8.0",
|
| 11 |
+
"9.0"
|
| 12 |
+
]
|
| 13 |
+
}
|
| 14 |
+
}
|
build/torch29-cxx11-cu130-x86_64-linux/_flash_attn2_588b404.abi3.so
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:568eb670747b578b865649894b7674d9053a2ba660ba2e491030c788e3d5936a
|
| 3 |
-
size 1009019168
|
|
|
|
|
|
|
|
|
|
|
|
build/torch29-cxx11-cu130-x86_64-linux/_flash_attn2_cuda_9f0ed09.abi3.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9e000e77b2d5f5b8554c1ba0e1edfc173bd19d904b95eede3f9cc7ecefbcf89
|
| 3 |
+
size 1009019192
|
build/torch29-cxx11-cu130-x86_64-linux/_ops.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import torch
|
| 2 |
-
from . import
|
| 3 |
-
ops = torch.ops.
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
-
return f"
|
|
|
|
| 1 |
import torch
|
| 2 |
+
from . import _flash_attn2_cuda_9f0ed09
|
| 3 |
+
ops = torch.ops._flash_attn2_cuda_9f0ed09
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
+
return f"_flash_attn2_cuda_9f0ed09::{op_name}"
|
build/torch29-cxx11-cu130-x86_64-linux/flash_attn_interface.py
CHANGED
|
@@ -31,8 +31,6 @@ def _get_device():
|
|
| 31 |
else:
|
| 32 |
return "cpu"
|
| 33 |
|
| 34 |
-
_XPU_AVAILABLE = torch.xpu.is_available() if hasattr(torch, "xpu") else False # TODO remove hasattr check when bwd is supported on XPU
|
| 35 |
-
|
| 36 |
|
| 37 |
def _get_block_size_n(device, head_dim, is_dropout, is_causal):
|
| 38 |
# This should match the block sizes in the CUDA kernel
|
|
@@ -1064,7 +1062,7 @@ def flash_attn_qkvpacked_func(
|
|
| 1064 |
alibi_slopes,
|
| 1065 |
deterministic,
|
| 1066 |
return_attn_probs,
|
| 1067 |
-
|
| 1068 |
)
|
| 1069 |
|
| 1070 |
|
|
@@ -1142,7 +1140,7 @@ def flash_attn_kvpacked_func(
|
|
| 1142 |
alibi_slopes,
|
| 1143 |
deterministic,
|
| 1144 |
return_attn_probs,
|
| 1145 |
-
|
| 1146 |
)
|
| 1147 |
|
| 1148 |
|
|
@@ -1219,7 +1217,7 @@ def flash_attn_func(
|
|
| 1219 |
alibi_slopes,
|
| 1220 |
deterministic,
|
| 1221 |
return_attn_probs,
|
| 1222 |
-
|
| 1223 |
)
|
| 1224 |
|
| 1225 |
|
|
@@ -1285,7 +1283,7 @@ def flash_attn_varlen_qkvpacked_func(
|
|
| 1285 |
alibi_slopes,
|
| 1286 |
deterministic,
|
| 1287 |
return_attn_probs,
|
| 1288 |
-
|
| 1289 |
)
|
| 1290 |
|
| 1291 |
|
|
@@ -1377,7 +1375,7 @@ def flash_attn_varlen_kvpacked_func(
|
|
| 1377 |
alibi_slopes,
|
| 1378 |
deterministic,
|
| 1379 |
return_attn_probs,
|
| 1380 |
-
|
| 1381 |
)
|
| 1382 |
|
| 1383 |
|
|
@@ -1471,7 +1469,7 @@ def flash_attn_varlen_func(
|
|
| 1471 |
deterministic,
|
| 1472 |
return_attn_probs,
|
| 1473 |
block_table,
|
| 1474 |
-
False if
|
| 1475 |
)
|
| 1476 |
|
| 1477 |
|
|
|
|
| 31 |
else:
|
| 32 |
return "cpu"
|
| 33 |
|
|
|
|
|
|
|
| 34 |
|
| 35 |
def _get_block_size_n(device, head_dim, is_dropout, is_causal):
|
| 36 |
# This should match the block sizes in the CUDA kernel
|
|
|
|
| 1062 |
alibi_slopes,
|
| 1063 |
deterministic,
|
| 1064 |
return_attn_probs,
|
| 1065 |
+
torch.is_grad_enabled(),
|
| 1066 |
)
|
| 1067 |
|
| 1068 |
|
|
|
|
| 1140 |
alibi_slopes,
|
| 1141 |
deterministic,
|
| 1142 |
return_attn_probs,
|
| 1143 |
+
torch.is_grad_enabled(),
|
| 1144 |
)
|
| 1145 |
|
| 1146 |
|
|
|
|
| 1217 |
alibi_slopes,
|
| 1218 |
deterministic,
|
| 1219 |
return_attn_probs,
|
| 1220 |
+
torch.is_grad_enabled(),
|
| 1221 |
)
|
| 1222 |
|
| 1223 |
|
|
|
|
| 1283 |
alibi_slopes,
|
| 1284 |
deterministic,
|
| 1285 |
return_attn_probs,
|
| 1286 |
+
torch.is_grad_enabled(),
|
| 1287 |
)
|
| 1288 |
|
| 1289 |
|
|
|
|
| 1375 |
alibi_slopes,
|
| 1376 |
deterministic,
|
| 1377 |
return_attn_probs,
|
| 1378 |
+
torch.is_grad_enabled(),
|
| 1379 |
)
|
| 1380 |
|
| 1381 |
|
|
|
|
| 1469 |
deterministic,
|
| 1470 |
return_attn_probs,
|
| 1471 |
block_table,
|
| 1472 |
+
False if q.device.type == "cpu" else torch.is_grad_enabled(),
|
| 1473 |
)
|
| 1474 |
|
| 1475 |
|
build/torch29-cxx11-cu130-x86_64-linux/metadata.json
CHANGED
|
@@ -1,4 +1,14 @@
|
|
| 1 |
{
|
| 2 |
"version": 1,
|
| 3 |
-
"
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
{
|
| 2 |
"version": 1,
|
| 3 |
+
"license": "BSD-3-Clause",
|
| 4 |
+
"python-depends": [],
|
| 5 |
+
"backend": {
|
| 6 |
+
"type": "cuda",
|
| 7 |
+
"archs": [
|
| 8 |
+
"10.0",
|
| 9 |
+
"12.0",
|
| 10 |
+
"8.0",
|
| 11 |
+
"9.0"
|
| 12 |
+
]
|
| 13 |
+
}
|
| 14 |
+
}
|
build/{torch210-cxx11-xpu20253-x86_64-linux/_flash_attn2_660971e.abi3.so → torch29-cxx11-xpu20252-x86_64-linux/_flash_attn2_xpu_9f0ed09.abi3.so}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:262841040bd11d2ea11f317107fdc9484d864db0377b423eb9007f4c8a7eb74f
|
| 3 |
+
size 13923672
|
build/torch29-cxx11-xpu20252-x86_64-linux/_ops.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import torch
|
| 2 |
-
from . import
|
| 3 |
-
ops = torch.ops.
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
-
return f"
|
|
|
|
| 1 |
import torch
|
| 2 |
+
from . import _flash_attn2_xpu_9f0ed09
|
| 3 |
+
ops = torch.ops._flash_attn2_xpu_9f0ed09
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
+
return f"_flash_attn2_xpu_9f0ed09::{op_name}"
|
build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn_interface.py
CHANGED
|
@@ -31,8 +31,6 @@ def _get_device():
|
|
| 31 |
else:
|
| 32 |
return "cpu"
|
| 33 |
|
| 34 |
-
_XPU_AVAILABLE = torch.xpu.is_available() if hasattr(torch, "xpu") else False # TODO remove hasattr check when bwd is supported on XPU
|
| 35 |
-
|
| 36 |
|
| 37 |
def _get_block_size_n(device, head_dim, is_dropout, is_causal):
|
| 38 |
# This should match the block sizes in the CUDA kernel
|
|
@@ -1285,7 +1283,7 @@ def flash_attn_varlen_qkvpacked_func(
|
|
| 1285 |
alibi_slopes,
|
| 1286 |
deterministic,
|
| 1287 |
return_attn_probs,
|
| 1288 |
-
|
| 1289 |
)
|
| 1290 |
|
| 1291 |
|
|
@@ -1377,7 +1375,7 @@ def flash_attn_varlen_kvpacked_func(
|
|
| 1377 |
alibi_slopes,
|
| 1378 |
deterministic,
|
| 1379 |
return_attn_probs,
|
| 1380 |
-
|
| 1381 |
)
|
| 1382 |
|
| 1383 |
|
|
@@ -1471,7 +1469,7 @@ def flash_attn_varlen_func(
|
|
| 1471 |
deterministic,
|
| 1472 |
return_attn_probs,
|
| 1473 |
block_table,
|
| 1474 |
-
False if
|
| 1475 |
)
|
| 1476 |
|
| 1477 |
|
|
|
|
| 31 |
else:
|
| 32 |
return "cpu"
|
| 33 |
|
|
|
|
|
|
|
| 34 |
|
| 35 |
def _get_block_size_n(device, head_dim, is_dropout, is_causal):
|
| 36 |
# This should match the block sizes in the CUDA kernel
|
|
|
|
| 1283 |
alibi_slopes,
|
| 1284 |
deterministic,
|
| 1285 |
return_attn_probs,
|
| 1286 |
+
torch.is_grad_enabled(),
|
| 1287 |
)
|
| 1288 |
|
| 1289 |
|
|
|
|
| 1375 |
alibi_slopes,
|
| 1376 |
deterministic,
|
| 1377 |
return_attn_probs,
|
| 1378 |
+
torch.is_grad_enabled(),
|
| 1379 |
)
|
| 1380 |
|
| 1381 |
|
|
|
|
| 1469 |
deterministic,
|
| 1470 |
return_attn_probs,
|
| 1471 |
block_table,
|
| 1472 |
+
False if q.device.type == "cpu" else torch.is_grad_enabled(),
|
| 1473 |
)
|
| 1474 |
|
| 1475 |
|
build/torch29-cxx11-xpu20252-x86_64-linux/metadata.json
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
{
|
| 2 |
"version": 1,
|
|
|
|
| 3 |
"python-depends": []
|
| 4 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"version": 1,
|
| 3 |
+
"license": "BSD-3-Clause",
|
| 4 |
"python-depends": []
|
| 5 |
}
|