danieldk HF Staff commited on Mar 13

Commit

645c13c

verified ·

1 Parent(s): 02f588e

Build uploaded using `kernels`.

Browse files

Files changed (43) hide show

build/torch210-cxx11-cpu-x86_64-linux/{_flash_attn2_588b404.abi3.so → _flash_attn2_cpu_9f0ed09.abi3.so} +2 -2
build/torch210-cxx11-cpu-x86_64-linux/_ops.py +3 -3
build/torch210-cxx11-cpu-x86_64-linux/flash_attn_interface.py +6 -8
build/torch210-cxx11-cpu-x86_64-linux/metadata.json +1 -0
build/torch210-cxx11-cu126-x86_64-linux/{_flash_attn2_588b404.abi3.so → _flash_attn2_cuda_9f0ed09.abi3.so} +2 -2
build/torch210-cxx11-cu126-x86_64-linux/_ops.py +3 -3
build/torch210-cxx11-cu126-x86_64-linux/flash_attn_interface.py +6 -8
build/torch210-cxx11-cu126-x86_64-linux/metadata.json +10 -2
build/torch210-cxx11-cu128-x86_64-linux/{_flash_attn2_588b404.abi3.so → _flash_attn2_cuda_9f0ed09.abi3.so} +2 -2
build/torch210-cxx11-cu128-x86_64-linux/_ops.py +3 -3
build/torch210-cxx11-cu128-x86_64-linux/flash_attn_interface.py +6 -8
build/torch210-cxx11-cu128-x86_64-linux/metadata.json +12 -2
build/torch210-cxx11-cu130-x86_64-linux/{_flash_attn2_588b404.abi3.so → _flash_attn2_cuda_9f0ed09.abi3.so} +2 -2
build/torch210-cxx11-cu130-x86_64-linux/_ops.py +3 -3
build/torch210-cxx11-cu130-x86_64-linux/flash_attn_interface.py +6 -8
build/torch210-cxx11-cu130-x86_64-linux/metadata.json +12 -2
build/{torch29-cxx11-xpu20252-x86_64-linux/_flash_attn2_660971e.abi3.so → torch210-cxx11-xpu20253-x86_64-linux/_flash_attn2_xpu_9f0ed09.abi3.so} +2 -2
build/torch210-cxx11-xpu20253-x86_64-linux/_ops.py +3 -3
build/torch210-cxx11-xpu20253-x86_64-linux/flash_attn_interface.py +3 -5
build/torch210-cxx11-xpu20253-x86_64-linux/metadata.json +1 -0
build/torch29-cxx11-cpu-x86_64-linux/{_flash_attn2_588b404.abi3.so → _flash_attn2_cpu_9f0ed09.abi3.so} +2 -2
build/torch29-cxx11-cpu-x86_64-linux/_ops.py +3 -3
build/torch29-cxx11-cpu-x86_64-linux/flash_attn_interface.py +6 -8
build/torch29-cxx11-cpu-x86_64-linux/metadata.json +1 -0
build/torch29-cxx11-cu126-x86_64-linux/_flash_attn2_588b404.abi3.so +0 -3
build/torch29-cxx11-cu126-x86_64-linux/_flash_attn2_cuda_9f0ed09.abi3.so +3 -0
build/torch29-cxx11-cu126-x86_64-linux/_ops.py +3 -3
build/torch29-cxx11-cu126-x86_64-linux/flash_attn_interface.py +6 -8
build/torch29-cxx11-cu126-x86_64-linux/metadata.json +10 -2
build/torch29-cxx11-cu128-x86_64-linux/_flash_attn2_588b404.abi3.so +0 -3
build/torch29-cxx11-cu128-x86_64-linux/_flash_attn2_cuda_9f0ed09.abi3.so +3 -0
build/torch29-cxx11-cu128-x86_64-linux/_ops.py +3 -3
build/torch29-cxx11-cu128-x86_64-linux/flash_attn_interface.py +6 -8
build/torch29-cxx11-cu128-x86_64-linux/metadata.json +12 -2
build/torch29-cxx11-cu130-x86_64-linux/_flash_attn2_588b404.abi3.so +0 -3
build/torch29-cxx11-cu130-x86_64-linux/_flash_attn2_cuda_9f0ed09.abi3.so +3 -0
build/torch29-cxx11-cu130-x86_64-linux/_ops.py +3 -3
build/torch29-cxx11-cu130-x86_64-linux/flash_attn_interface.py +6 -8
build/torch29-cxx11-cu130-x86_64-linux/metadata.json +12 -2
build/{torch210-cxx11-xpu20253-x86_64-linux/_flash_attn2_660971e.abi3.so → torch29-cxx11-xpu20252-x86_64-linux/_flash_attn2_xpu_9f0ed09.abi3.so} +2 -2
build/torch29-cxx11-xpu20252-x86_64-linux/_ops.py +3 -3
build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn_interface.py +3 -5
build/torch29-cxx11-xpu20252-x86_64-linux/metadata.json +1 -0

build/torch210-cxx11-cpu-x86_64-linux/{_flash_attn2_588b404.abi3.so → _flash_attn2_cpu_9f0ed09.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d90d30dbcf574c7a50f2c9774884370e71e1e177062c6a233fcc7e1940fffcb
-size 249504

 version https://git-lfs.github.com/spec/v1
+oid sha256:45757a80c809dcbf1a8c75bde0c42dcba171960901f1c085699036d908a81c20
+size 1942496

build/torch210-cxx11-cpu-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flash_attn2_588b404
-ops = torch.ops._flash_attn2_588b404
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flash_attn2_588b404::{op_name}"

 import torch
+from . import _flash_attn2_cpu_9f0ed09
+ops = torch.ops._flash_attn2_cpu_9f0ed09
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flash_attn2_cpu_9f0ed09::{op_name}"

build/torch210-cxx11-cpu-x86_64-linux/flash_attn_interface.py CHANGED Viewed

@@ -31,8 +31,6 @@ def _get_device():
     else:
         return "cpu"
-_XPU_AVAILABLE = torch.xpu.is_available() if hasattr(torch, "xpu") else False # TODO remove hasattr check when bwd is supported on XPU
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     # This should match the block sizes in the CUDA kernel
@@ -1064,7 +1062,7 @@ def flash_attn_qkvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1142,7 +1140,7 @@ def flash_attn_kvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1219,7 +1217,7 @@ def flash_attn_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1285,7 +1283,7 @@ def flash_attn_varlen_qkvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1377,7 +1375,7 @@ def flash_attn_varlen_kvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1471,7 +1469,7 @@ def flash_attn_varlen_func(
         deterministic,
         return_attn_probs,
         block_table,
-        False if _XPU_AVAILABLE or q.device.type == "cpu" else torch.is_grad_enabled(),
     )

     else:
         return "cpu"
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     # This should match the block sizes in the CUDA kernel
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         deterministic,
         return_attn_probs,
         block_table,
+        False if q.device.type == "cpu" else torch.is_grad_enabled(),
     )

build/torch210-cxx11-cpu-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,4 +1,5 @@
 {
   "version": 1,
   "python-depends": []
 }

 {
   "version": 1,
+  "license": "BSD-3-Clause",
   "python-depends": []
 }

build/torch210-cxx11-cu126-x86_64-linux/{_flash_attn2_588b404.abi3.so → _flash_attn2_cuda_9f0ed09.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:247ade2063814573447dcb697fd39e738bcf5f0f5d40ac87eaf6cf6dba29298f
-size 448708992

 version https://git-lfs.github.com/spec/v1
+oid sha256:039e336f68c3efaa02ef0b103a2607b2a544d68a46fa0165e2433464f26223a3
+size 448709016

build/torch210-cxx11-cu126-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flash_attn2_588b404
-ops = torch.ops._flash_attn2_588b404
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flash_attn2_588b404::{op_name}"

 import torch
+from . import _flash_attn2_cuda_9f0ed09
+ops = torch.ops._flash_attn2_cuda_9f0ed09
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flash_attn2_cuda_9f0ed09::{op_name}"

build/torch210-cxx11-cu126-x86_64-linux/flash_attn_interface.py CHANGED Viewed

@@ -31,8 +31,6 @@ def _get_device():
     else:
         return "cpu"
-_XPU_AVAILABLE = torch.xpu.is_available() if hasattr(torch, "xpu") else False # TODO remove hasattr check when bwd is supported on XPU
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     # This should match the block sizes in the CUDA kernel
@@ -1064,7 +1062,7 @@ def flash_attn_qkvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1142,7 +1140,7 @@ def flash_attn_kvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1219,7 +1217,7 @@ def flash_attn_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1285,7 +1283,7 @@ def flash_attn_varlen_qkvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1377,7 +1375,7 @@ def flash_attn_varlen_kvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1471,7 +1469,7 @@ def flash_attn_varlen_func(
         deterministic,
         return_attn_probs,
         block_table,
-        False if _XPU_AVAILABLE or q.device.type == "cpu" else torch.is_grad_enabled(),
     )

     else:
         return "cpu"
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     # This should match the block sizes in the CUDA kernel
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         deterministic,
         return_attn_probs,
         block_table,
+        False if q.device.type == "cpu" else torch.is_grad_enabled(),
     )

build/torch210-cxx11-cu126-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,4 +1,12 @@
 {
   "version": 1,
-  "python-depends": []
-}

 {
   "version": 1,
+  "license": "BSD-3-Clause",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "8.0",
+      "9.0"
+    ]
+  }
+}

build/torch210-cxx11-cu128-x86_64-linux/{_flash_attn2_588b404.abi3.so → _flash_attn2_cuda_9f0ed09.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:09cfe096dc8f0010e99225d44263e4d9172d4b542d48d656b3b9fd718ca55b7d
-size 1037803376

 version https://git-lfs.github.com/spec/v1
+oid sha256:ac175faf91cfb9cd9827985bae32380035cb9f880f0bbb702e7f045eee90ae0a
+size 1037803408

build/torch210-cxx11-cu128-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flash_attn2_588b404
-ops = torch.ops._flash_attn2_588b404
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flash_attn2_588b404::{op_name}"

 import torch
+from . import _flash_attn2_cuda_9f0ed09
+ops = torch.ops._flash_attn2_cuda_9f0ed09
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flash_attn2_cuda_9f0ed09::{op_name}"

build/torch210-cxx11-cu128-x86_64-linux/flash_attn_interface.py CHANGED Viewed

@@ -31,8 +31,6 @@ def _get_device():
     else:
         return "cpu"
-_XPU_AVAILABLE = torch.xpu.is_available() if hasattr(torch, "xpu") else False # TODO remove hasattr check when bwd is supported on XPU
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     # This should match the block sizes in the CUDA kernel
@@ -1064,7 +1062,7 @@ def flash_attn_qkvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1142,7 +1140,7 @@ def flash_attn_kvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1219,7 +1217,7 @@ def flash_attn_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1285,7 +1283,7 @@ def flash_attn_varlen_qkvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1377,7 +1375,7 @@ def flash_attn_varlen_kvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1471,7 +1469,7 @@ def flash_attn_varlen_func(
         deterministic,
         return_attn_probs,
         block_table,
-        False if _XPU_AVAILABLE or q.device.type == "cpu" else torch.is_grad_enabled(),
     )

     else:
         return "cpu"
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     # This should match the block sizes in the CUDA kernel
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         deterministic,
         return_attn_probs,
         block_table,
+        False if q.device.type == "cpu" else torch.is_grad_enabled(),
     )

build/torch210-cxx11-cu128-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,4 +1,14 @@
 {
   "version": 1,
-  "python-depends": []
-}

 {
   "version": 1,
+  "license": "BSD-3-Clause",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "10.0",
+      "12.0",
+      "8.0",
+      "9.0"
+    ]
+  }
+}

build/torch210-cxx11-cu130-x86_64-linux/{_flash_attn2_588b404.abi3.so → _flash_attn2_cuda_9f0ed09.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:196d3756a7d099f5e23ddd53ebc47aadf558a96e1d7873f5a14faec09bb7b707
-size 1009055064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e67e00b79ea3625b0ec32a083544d1808cce682dfe593a7212c525a292d1764f
+size 1009055088

build/torch210-cxx11-cu130-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flash_attn2_588b404
-ops = torch.ops._flash_attn2_588b404
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flash_attn2_588b404::{op_name}"

 import torch
+from . import _flash_attn2_cuda_9f0ed09
+ops = torch.ops._flash_attn2_cuda_9f0ed09
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flash_attn2_cuda_9f0ed09::{op_name}"

build/torch210-cxx11-cu130-x86_64-linux/flash_attn_interface.py CHANGED Viewed

@@ -31,8 +31,6 @@ def _get_device():
     else:
         return "cpu"
-_XPU_AVAILABLE = torch.xpu.is_available() if hasattr(torch, "xpu") else False # TODO remove hasattr check when bwd is supported on XPU
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     # This should match the block sizes in the CUDA kernel
@@ -1064,7 +1062,7 @@ def flash_attn_qkvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1142,7 +1140,7 @@ def flash_attn_kvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1219,7 +1217,7 @@ def flash_attn_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1285,7 +1283,7 @@ def flash_attn_varlen_qkvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1377,7 +1375,7 @@ def flash_attn_varlen_kvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1471,7 +1469,7 @@ def flash_attn_varlen_func(
         deterministic,
         return_attn_probs,
         block_table,
-        False if _XPU_AVAILABLE or q.device.type == "cpu" else torch.is_grad_enabled(),
     )

     else:
         return "cpu"
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     # This should match the block sizes in the CUDA kernel
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         deterministic,
         return_attn_probs,
         block_table,
+        False if q.device.type == "cpu" else torch.is_grad_enabled(),
     )

build/torch210-cxx11-cu130-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,4 +1,14 @@
 {
   "version": 1,
-  "python-depends": []
-}

 {
   "version": 1,
+  "license": "BSD-3-Clause",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "10.0",
+      "12.0",
+      "8.0",
+      "9.0"
+    ]
+  }
+}

build/{torch29-cxx11-xpu20252-x86_64-linux/_flash_attn2_660971e.abi3.so → torch210-cxx11-xpu20253-x86_64-linux/_flash_attn2_xpu_9f0ed09.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d2c3b732c7af702975d491dd792485a9dc9a68f41d8f6cdd9fca64045dd27b6
-size 13907792

 version https://git-lfs.github.com/spec/v1
+oid sha256:192cfb14df060c1fd913c7a6e5a588da44dec07d0a9adf156ed377d860c8d3c2
+size 15436096

build/torch210-cxx11-xpu20253-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flash_attn2_660971e
-ops = torch.ops._flash_attn2_660971e
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flash_attn2_660971e::{op_name}"

 import torch
+from . import _flash_attn2_xpu_9f0ed09
+ops = torch.ops._flash_attn2_xpu_9f0ed09
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flash_attn2_xpu_9f0ed09::{op_name}"

build/torch210-cxx11-xpu20253-x86_64-linux/flash_attn_interface.py CHANGED Viewed

@@ -31,8 +31,6 @@ def _get_device():
     else:
         return "cpu"
-_XPU_AVAILABLE = torch.xpu.is_available() if hasattr(torch, "xpu") else False # TODO remove hasattr check when bwd is supported on XPU
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     # This should match the block sizes in the CUDA kernel
@@ -1285,7 +1283,7 @@ def flash_attn_varlen_qkvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1377,7 +1375,7 @@ def flash_attn_varlen_kvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1471,7 +1469,7 @@ def flash_attn_varlen_func(
         deterministic,
         return_attn_probs,
         block_table,
-        False if _XPU_AVAILABLE or q.device.type == "cpu" else torch.is_grad_enabled(),
     )

     else:
         return "cpu"
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     # This should match the block sizes in the CUDA kernel
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         deterministic,
         return_attn_probs,
         block_table,
+        False if q.device.type == "cpu" else torch.is_grad_enabled(),
     )

build/torch210-cxx11-xpu20253-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,4 +1,5 @@
 {
   "version": 1,
   "python-depends": []
 }

 {
   "version": 1,
+  "license": "BSD-3-Clause",
   "python-depends": []
 }

build/torch29-cxx11-cpu-x86_64-linux/{_flash_attn2_588b404.abi3.so → _flash_attn2_cpu_9f0ed09.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfab2b6517d74cf640d3da51f60a787dc42fa98774e10bf52d0a265cc5423f53
-size 239416

 version https://git-lfs.github.com/spec/v1
+oid sha256:7313920d802d0477ca3d4144bc3b11e3c4761ea0e42b55a9b1b0b05567d23f71
+size 1932200

build/torch29-cxx11-cpu-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flash_attn2_588b404
-ops = torch.ops._flash_attn2_588b404
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flash_attn2_588b404::{op_name}"

 import torch
+from . import _flash_attn2_cpu_9f0ed09
+ops = torch.ops._flash_attn2_cpu_9f0ed09
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flash_attn2_cpu_9f0ed09::{op_name}"

build/torch29-cxx11-cpu-x86_64-linux/flash_attn_interface.py CHANGED Viewed

@@ -31,8 +31,6 @@ def _get_device():
     else:
         return "cpu"
-_XPU_AVAILABLE = torch.xpu.is_available() if hasattr(torch, "xpu") else False # TODO remove hasattr check when bwd is supported on XPU
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     # This should match the block sizes in the CUDA kernel
@@ -1064,7 +1062,7 @@ def flash_attn_qkvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1142,7 +1140,7 @@ def flash_attn_kvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1219,7 +1217,7 @@ def flash_attn_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1285,7 +1283,7 @@ def flash_attn_varlen_qkvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1377,7 +1375,7 @@ def flash_attn_varlen_kvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1471,7 +1469,7 @@ def flash_attn_varlen_func(
         deterministic,
         return_attn_probs,
         block_table,
-        False if _XPU_AVAILABLE or q.device.type == "cpu" else torch.is_grad_enabled(),
     )

     else:
         return "cpu"
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     # This should match the block sizes in the CUDA kernel
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         deterministic,
         return_attn_probs,
         block_table,
+        False if q.device.type == "cpu" else torch.is_grad_enabled(),
     )

build/torch29-cxx11-cpu-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,4 +1,5 @@
 {
   "version": 1,
   "python-depends": []
 }

 {
   "version": 1,
+  "license": "BSD-3-Clause",
   "python-depends": []
 }

build/torch29-cxx11-cu126-x86_64-linux/_flash_attn2_588b404.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:82a33a975de0a2c8e2440d596ecde21e4f3e1e8dcc9df42843e2045edb1e6d47
-size 448648728

build/torch29-cxx11-cu126-x86_64-linux/_flash_attn2_cuda_9f0ed09.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d422421456bf5ac34486ee898a7a6aaea7fff2edda3bce062d0283f69806275
+size 448648752

build/torch29-cxx11-cu126-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flash_attn2_588b404
-ops = torch.ops._flash_attn2_588b404
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flash_attn2_588b404::{op_name}"

 import torch
+from . import _flash_attn2_cuda_9f0ed09
+ops = torch.ops._flash_attn2_cuda_9f0ed09
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flash_attn2_cuda_9f0ed09::{op_name}"

build/torch29-cxx11-cu126-x86_64-linux/flash_attn_interface.py CHANGED Viewed

@@ -31,8 +31,6 @@ def _get_device():
     else:
         return "cpu"
-_XPU_AVAILABLE = torch.xpu.is_available() if hasattr(torch, "xpu") else False # TODO remove hasattr check when bwd is supported on XPU
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     # This should match the block sizes in the CUDA kernel
@@ -1064,7 +1062,7 @@ def flash_attn_qkvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1142,7 +1140,7 @@ def flash_attn_kvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1219,7 +1217,7 @@ def flash_attn_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1285,7 +1283,7 @@ def flash_attn_varlen_qkvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1377,7 +1375,7 @@ def flash_attn_varlen_kvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1471,7 +1469,7 @@ def flash_attn_varlen_func(
         deterministic,
         return_attn_probs,
         block_table,
-        False if _XPU_AVAILABLE or q.device.type == "cpu" else torch.is_grad_enabled(),
     )

     else:
         return "cpu"
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     # This should match the block sizes in the CUDA kernel
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         deterministic,
         return_attn_probs,
         block_table,
+        False if q.device.type == "cpu" else torch.is_grad_enabled(),
     )

build/torch29-cxx11-cu126-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,4 +1,12 @@
 {
   "version": 1,
-  "python-depends": []
-}

 {
   "version": 1,
+  "license": "BSD-3-Clause",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "8.0",
+      "9.0"
+    ]
+  }
+}

build/torch29-cxx11-cu128-x86_64-linux/_flash_attn2_588b404.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7b3afc2eda58b3649ac67513c775ce1cb124e5498f8dbbbe4ef07db6857d56d3
-size 1037644608

build/torch29-cxx11-cu128-x86_64-linux/_flash_attn2_cuda_9f0ed09.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:29b22b8c1bbd77125b6c82aca5fecfe0416d2f116be7b1e1a4638f76fe542a2e
+size 1037644632

build/torch29-cxx11-cu128-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flash_attn2_588b404
-ops = torch.ops._flash_attn2_588b404
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flash_attn2_588b404::{op_name}"

 import torch
+from . import _flash_attn2_cuda_9f0ed09
+ops = torch.ops._flash_attn2_cuda_9f0ed09
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flash_attn2_cuda_9f0ed09::{op_name}"

build/torch29-cxx11-cu128-x86_64-linux/flash_attn_interface.py CHANGED Viewed

@@ -31,8 +31,6 @@ def _get_device():
     else:
         return "cpu"
-_XPU_AVAILABLE = torch.xpu.is_available() if hasattr(torch, "xpu") else False # TODO remove hasattr check when bwd is supported on XPU
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     # This should match the block sizes in the CUDA kernel
@@ -1064,7 +1062,7 @@ def flash_attn_qkvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1142,7 +1140,7 @@ def flash_attn_kvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1219,7 +1217,7 @@ def flash_attn_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1285,7 +1283,7 @@ def flash_attn_varlen_qkvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1377,7 +1375,7 @@ def flash_attn_varlen_kvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1471,7 +1469,7 @@ def flash_attn_varlen_func(
         deterministic,
         return_attn_probs,
         block_table,
-        False if _XPU_AVAILABLE or q.device.type == "cpu" else torch.is_grad_enabled(),
     )

     else:
         return "cpu"
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     # This should match the block sizes in the CUDA kernel
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         deterministic,
         return_attn_probs,
         block_table,
+        False if q.device.type == "cpu" else torch.is_grad_enabled(),
     )

build/torch29-cxx11-cu128-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,4 +1,14 @@
 {
   "version": 1,
-  "python-depends": []
-}

 {
   "version": 1,
+  "license": "BSD-3-Clause",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "10.0",
+      "12.0",
+      "8.0",
+      "9.0"
+    ]
+  }
+}

build/torch29-cxx11-cu130-x86_64-linux/_flash_attn2_588b404.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:568eb670747b578b865649894b7674d9053a2ba660ba2e491030c788e3d5936a
-size 1009019168

build/torch29-cxx11-cu130-x86_64-linux/_flash_attn2_cuda_9f0ed09.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c9e000e77b2d5f5b8554c1ba0e1edfc173bd19d904b95eede3f9cc7ecefbcf89
+size 1009019192

build/torch29-cxx11-cu130-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flash_attn2_588b404
-ops = torch.ops._flash_attn2_588b404
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flash_attn2_588b404::{op_name}"

 import torch
+from . import _flash_attn2_cuda_9f0ed09
+ops = torch.ops._flash_attn2_cuda_9f0ed09
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flash_attn2_cuda_9f0ed09::{op_name}"

build/torch29-cxx11-cu130-x86_64-linux/flash_attn_interface.py CHANGED Viewed

@@ -31,8 +31,6 @@ def _get_device():
     else:
         return "cpu"
-_XPU_AVAILABLE = torch.xpu.is_available() if hasattr(torch, "xpu") else False # TODO remove hasattr check when bwd is supported on XPU
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     # This should match the block sizes in the CUDA kernel
@@ -1064,7 +1062,7 @@ def flash_attn_qkvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1142,7 +1140,7 @@ def flash_attn_kvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1219,7 +1217,7 @@ def flash_attn_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1285,7 +1283,7 @@ def flash_attn_varlen_qkvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1377,7 +1375,7 @@ def flash_attn_varlen_kvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1471,7 +1469,7 @@ def flash_attn_varlen_func(
         deterministic,
         return_attn_probs,
         block_table,
-        False if _XPU_AVAILABLE or q.device.type == "cpu" else torch.is_grad_enabled(),
     )

     else:
         return "cpu"
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     # This should match the block sizes in the CUDA kernel
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         deterministic,
         return_attn_probs,
         block_table,
+        False if q.device.type == "cpu" else torch.is_grad_enabled(),
     )

build/torch29-cxx11-cu130-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,4 +1,14 @@
 {
   "version": 1,
-  "python-depends": []
-}

 {
   "version": 1,
+  "license": "BSD-3-Clause",
+  "python-depends": [],
+  "backend": {
+    "type": "cuda",
+    "archs": [
+      "10.0",
+      "12.0",
+      "8.0",
+      "9.0"
+    ]
+  }
+}

build/{torch210-cxx11-xpu20253-x86_64-linux/_flash_attn2_660971e.abi3.so → torch29-cxx11-xpu20252-x86_64-linux/_flash_attn2_xpu_9f0ed09.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29c0a6792979ae3880bd1cf1077d710b14fbd96b244665c75e4e7aaf65449a49
-size 15424416

 version https://git-lfs.github.com/spec/v1
+oid sha256:262841040bd11d2ea11f317107fdc9484d864db0377b423eb9007f4c8a7eb74f
+size 13923672

build/torch29-cxx11-xpu20252-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flash_attn2_660971e
-ops = torch.ops._flash_attn2_660971e
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flash_attn2_660971e::{op_name}"

 import torch
+from . import _flash_attn2_xpu_9f0ed09
+ops = torch.ops._flash_attn2_xpu_9f0ed09
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flash_attn2_xpu_9f0ed09::{op_name}"

build/torch29-cxx11-xpu20252-x86_64-linux/flash_attn_interface.py CHANGED Viewed

@@ -31,8 +31,6 @@ def _get_device():
     else:
         return "cpu"
-_XPU_AVAILABLE = torch.xpu.is_available() if hasattr(torch, "xpu") else False # TODO remove hasattr check when bwd is supported on XPU
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     # This should match the block sizes in the CUDA kernel
@@ -1285,7 +1283,7 @@ def flash_attn_varlen_qkvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1377,7 +1375,7 @@ def flash_attn_varlen_kvpacked_func(
         alibi_slopes,
         deterministic,
         return_attn_probs,
-        False if _XPU_AVAILABLE else torch.is_grad_enabled(),
     )
@@ -1471,7 +1469,7 @@ def flash_attn_varlen_func(
         deterministic,
         return_attn_probs,
         block_table,
-        False if _XPU_AVAILABLE or q.device.type == "cpu" else torch.is_grad_enabled(),
     )

     else:
         return "cpu"
 def _get_block_size_n(device, head_dim, is_dropout, is_causal):
     # This should match the block sizes in the CUDA kernel
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         alibi_slopes,
         deterministic,
         return_attn_probs,
+        torch.is_grad_enabled(),
     )
         deterministic,
         return_attn_probs,
         block_table,
+        False if q.device.type == "cpu" else torch.is_grad_enabled(),
     )

build/torch29-cxx11-xpu20252-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,4 +1,5 @@
 {
   "version": 1,
   "python-depends": []
 }

 {
   "version": 1,
+  "license": "BSD-3-Clause",
   "python-depends": []
 }