mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2026-04-23 00:17:25 +08:00
8d27a523e7
* [feat] support attention_store kv cache backend * [fix] fix codestyle * [chore] optimize log * [fix] fix write storage task * [fix] fix read storage * [fix] fix code conflict after merge develop * [fix] fix cache bytes and read task token ids * [chore] add model for cache transfer manager * [chore] add some log * [chore] remove launched_cache_manager_signal * [fix] fix write_back_storage_task match_block_num condition * [fix] fix swap_cost_time * [ci] fix ci * Update fastdeploy/engine/sched/resource_manager_v1.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update fastdeploy/cache_manager/cache_transfer_manager.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update fastdeploy/cache_manager/transfer_factory/mooncake_store/attention_store.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
105 lines
2.7 KiB
Python
105 lines
2.7 KiB
Python
"""
|
|
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""
|
|
|
|
from abc import ABC, abstractmethod
|
|
from typing import Any, List, Optional
|
|
|
|
import paddle
|
|
|
|
from fastdeploy.utils import get_logger
|
|
|
|
logger = get_logger("cache_storage", "cache_storage.log")
|
|
|
|
|
|
class KVCacheStorage(ABC):
|
|
"""
|
|
KVCacheStorage is a class that provides a generic key-value interface for storing and retrieving KV cache.
|
|
"""
|
|
|
|
@abstractmethod
|
|
def get(
|
|
self,
|
|
key: str,
|
|
target_location: Optional[Any] = None,
|
|
target_size: Optional[Any] = None,
|
|
) -> paddle.Tensor | None:
|
|
"""
|
|
Retrieve the value associated with the given key.
|
|
Returns None if the key does not exist.
|
|
"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def batch_get(
|
|
self,
|
|
keys: List[str],
|
|
target_locations: Optional[Any] = None,
|
|
target_sizes: Optional[Any] = None,
|
|
) -> List[paddle.Tensor | None]:
|
|
"""
|
|
Retrieve values for multiple keys.
|
|
Returns a list of tensors or None for each key.
|
|
"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def set(
|
|
self,
|
|
key: str,
|
|
target_location: Optional[Any] = None,
|
|
target_size: Optional[Any] = None,
|
|
) -> bool:
|
|
"""
|
|
Store the value associated with the given key.
|
|
Returns True if the operation was successful, False otherwise.
|
|
"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def batch_set(
|
|
self,
|
|
keys: List[str],
|
|
target_locations: Optional[Any] = None,
|
|
target_sizes: Optional[Any] = None,
|
|
) -> bool:
|
|
"""
|
|
Store multiple key-value pairs.
|
|
Returns True if all operations were successful, False otherwise.
|
|
"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def exists(self, keys: List[str]) -> bool:
|
|
"""
|
|
Check if the key exists in the storage.
|
|
Returns True if the key exists, False otherwise.
|
|
"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def clear(self) -> bool:
|
|
"""
|
|
Clear all keys in storage
|
|
"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def query(self) -> int:
|
|
"""
|
|
Query the number of blocks stored in the storage.
|
|
"""
|
|
pass
|