Source code for zict.lru

from __future__ import annotations

from collections.abc import (
    Callable,
    Collection,
    ItemsView,
    Iterator,
    KeysView,
    MutableMapping,
    ValuesView,
)

from zict.common import KT, VT, NoDefault, ZictBase, close, flush, locked, nodefault
from zict.utils import InsertionSortedSet



[docs]
class LRU(ZictBase[KT, VT]):
    """Evict Least Recently Used Elements.

    Parameters
    ----------
    n: int or float
        Number of elements to keep, or total weight if ``weight`` is used.
        Any individual key that is heavier than n will be automatically evicted as soon
        as it is inserted.

        It can be updated after initialization. See also: ``offset`` attribute.
    d: MutableMapping
        Dict-like in which to hold elements. There are no expectations on its internal
        ordering. Iteration on the LRU follows the order of the underlying mapping.
    on_evict: callable or list of callables
        Function:: k, v -> action to call on key/value pairs prior to eviction
        If an exception occurs during an on_evict callback (e.g a callback tried
        storing to disk and raised a disk full error) the key will remain in the LRU.
    on_cancel_evict: callable or list of callables
        Function:: k, v -> action to call on key/value pairs if they're deleted or
        updated from a thread while the on_evict callables are being executed in
        another.
        If you're not accessing the LRU from multiple threads, ignore this parameter.
    weight: callable
        Function:: k, v -> number to determine the size of keeping the item in
        the mapping.  Defaults to ``(k, v) -> 1``

    Notes
    -----
    If you call methods of this class from multiple threads, access will be fast as long
    as all methods of ``d`` are fast. Callbacks are not protected by locks and can be
    arbitrarily slow.

    Examples
    --------
    >>> lru = LRU(2, {}, on_evict=lambda k, v: print("Lost", k, v))
    >>> lru['x'] = 1
    >>> lru['y'] = 2
    >>> lru['z'] = 3
    Lost x 1
    """

    d: MutableMapping[KT, VT]
    order: InsertionSortedSet[KT]
    heavy: InsertionSortedSet[KT]
    on_evict: list[Callable[[KT, VT], None]]
    on_cancel_evict: list[Callable[[KT, VT], None]]
    weight: Callable[[KT, VT], float]
    #: Maximum weight before eviction is triggered, as set during initialization.
    #: Updating this attribute doesn't trigger eviction by itself; you should call
    #: :meth:`evict_until_below_target` explicitly afterwards.
    n: float
    #: Offset to add to ``total_weight`` to determine if key/value pairs should be
    #: evicted. It always starts at zero and can be updated afterwards. Updating this
    #: attribute doesn't trigger eviction by itself; you should call
    #: :meth:`evict_until_below_target` explicitly afterwards.
    #: Increasing ``offset`` is not the same as reducing ``n``, as the latter will also
    #: reduce the threshold below which a value is considered "heavy" and qualifies for
    #: immediate eviction.
    offset: float
    weights: dict[KT, float]
    closed: bool
    total_weight: float
    _cancel_evict: dict[KT, bool]

    def __init__(
        self,
        n: float,
        d: MutableMapping[KT, VT],
        *,
        on_evict: Callable[[KT, VT], None]
        | list[Callable[[KT, VT], None]]
        | None = None,
        on_cancel_evict: Callable[[KT, VT], None]
        | list[Callable[[KT, VT], None]]
        | None = None,
        weight: Callable[[KT, VT], float] = lambda k, v: 1,
    ):
        super().__init__()
        self.d = d
        self.n = n
        self.offset = 0

        if callable(on_evict):
            on_evict = [on_evict]
        self.on_evict = on_evict or []
        if callable(on_cancel_evict):
            on_cancel_evict = [on_cancel_evict]
        self.on_cancel_evict = on_cancel_evict or []

        self.weight = weight
        self.weights = {k: weight(k, v) for k, v in d.items()}
        self.total_weight = sum(self.weights.values())
        self.order = InsertionSortedSet(d)
        self.heavy = InsertionSortedSet(k for k, v in self.weights.items() if v >= n)
        self.closed = False
        self._cancel_evict = {}

    @locked
    def __getitem__(self, key: KT) -> VT:
        result = self.d[key]
        self.order.remove(key)
        self.order.add(key)
        return result


[docs]
    @locked
    def get_all_or_nothing(self, keys: Collection[KT]) -> dict[KT, VT]:
        """If all keys exist in the LRU, update their FIFO priority and return their
        values; this would be the same as ``{k: lru[k] for k in keys}``.
        If any keys are missing, however, raise KeyError for the first one missing and
        do not bring any of the available keys to the top of the LRU.
        """
        result = {key: self.d[key] for key in keys}
        for key in keys:
            self.order.remove(key)
            self.order.add(key)
        return result


    def __setitem__(self, key: KT, value: VT) -> None:
        self.set_noevict(key, value)
        try:
            self.evict_until_below_target()
        except Exception:
            if self.weights.get(key, 0) > self.n and key not in self.heavy:
                # weight(value) > n and evicting the key we just inserted failed.
                # Evict the rest of the LRU instead.
                try:
                    while len(self.d) > 1:
                        self.evict()
                except Exception:
                    pass
            raise


[docs]
    @locked
    def set_noevict(self, key: KT, value: VT) -> None:
        """Variant of ``__setitem__`` that does not evict if the total weight exceeds n.
        Unlike ``__setitem__``, this method does not depend on the ``on_evict``
        functions to be thread-safe for its own thread-safety. It also is not prone to
        re-raising exceptions from the ``on_evict`` callbacks.
        """
        self.discard(key)
        weight = self.weight(key, value)
        if key in self._cancel_evict:
            self._cancel_evict[key] = True
        self.d[key] = value
        self.order.add(key)
        if weight > self.n:
            self.heavy.add(key)  # Mark this key to be evicted first
        self.weights[key] = weight
        self.total_weight += weight



[docs]
    def evict_until_below_target(self, n: float | None = None) -> None:
        """Evict key/value pairs until the total weight falls below n

        Parameters
        ----------
        n: float, optional
            Total weight threshold to achieve. Defaults to self.n.
        """
        if n is None:
            n = self.n
        while self.total_weight + self.offset > n and not self.closed:
            try:
                self.evict()
            except KeyError:
                return  # Multithreaded race condition



[docs]
    @locked
    def evict(
        self, key: KT | NoDefault = nodefault
    ) -> tuple[KT, VT, float] | tuple[None, None, float]:
        """Evict least recently used key, or least recently inserted key with individual
        weight > n, if any. You may also evict a specific key.

        This is typically called from internal use, but can be externally
        triggered as well.

        Returns
        -------
        Tuple of (key, value, weight)

        Or (None, None, 0) if the key that was being evicted was updated or deleted from
        another thread while the on_evict callbacks were being executed. This outcome is
        only possible in multithreaded access.
        """
        if key is nodefault:
            try:
                key = next(iter(self.heavy or self.order))
            except StopIteration:
                raise KeyError("evict(): dictionary is empty")

        if key in self._cancel_evict:
            return None, None, 0

        # For the purpose of multithreaded access, it's important that the value remains
        # in self.d until all callbacks are successful.
        # When this is used inside a Buffer, there must never be a moment when the key
        # is neither in fast nor in slow.
        value = self.d[key]

        # If we are evicting a heavy key we just inserted and one of the callbacks
        # fails, put it at the bottom of the LRU instead of the top. This way lighter
        # keys will have a chance to be evicted first and make space.
        self.heavy.discard(key)

        self._cancel_evict[key] = False
        try:
            with self.unlock():
                # This may raise; e.g. if a callback tries storing to a full disk
                for cb in self.on_evict:
                    cb(key, value)

                if self._cancel_evict[key]:
                    for cb in self.on_cancel_evict:
                        cb(key, value)
                    return None, None, 0
        finally:
            del self._cancel_evict[key]

        del self.d[key]
        self.order.remove(key)
        weight = self.weights.pop(key)
        self.total_weight -= weight

        return key, value, weight


    @locked
    def __delitem__(self, key: KT) -> None:
        if key in self._cancel_evict:
            self._cancel_evict[key] = True
        del self.d[key]
        self.order.remove(key)
        self.heavy.discard(key)
        self.total_weight -= self.weights.pop(key)


[docs]
    def keys(self) -> KeysView[KT]:
        return self.d.keys()



[docs]
    def values(self) -> ValuesView[VT]:
        return self.d.values()



[docs]
    def items(self) -> ItemsView[KT, VT]:
        return self.d.items()


    def __len__(self) -> int:
        return len(self.d)

    def __iter__(self) -> Iterator[KT]:
        return iter(self.d)

    def __contains__(self, key: object) -> bool:
        return key in self.d

    def __str__(self) -> str:
        sub = str(self.d) if not isinstance(self.d, dict) else "dict"
        return f"<LRU: {self.total_weight + self.offset}/{self.n} on {sub}>"

    __repr__ = __str__

    def flush(self) -> None:
        flush(self.d)


[docs]
    def close(self) -> None:
        self.closed = True
        close(self.d)
Source code for zict.lru

zict

Navigation

Related Topics