/*
 * PROJECT: Alcyone System Kernel
 * LICENSE: BSD Clause 3
 * PURPOSE: Spinning Ring Lock 
 * NT KERNEL: 5.11.9360
 * COPYRIGHT:  2023-2029 Dibymartanda Samanta <>
 */ 

 /*Alcyone's Spinning Ring Lock is a tweaked Mellor-Crummey and Scott Spin Lock algorithm that utilizes a Ring Buffer for 
   implementing a FIFO algorithm for lock acquisition and unlocking. The Ring Buffer queue is completely lock-free, utilizing a Hazard
   Pointer for accessing members of the structure.

   It has been designed for high contention scenarios to provide:

   - High degree of scalability 
   - Fairness 
   - Reduced cache coherency traffic
   - Reduced latency
   - Superior cache line utilization                                                                        

   Note: The Spinning Ring Lock should only be used when dealing with high contention scenarios  */

/* Spinning Ring Lock Data Type */
typedef struct _MCS_NODE {
    struct _MCS_NODE* Next;
    LONG Locked;
} MCS_NODE, *PMCS_NODE;

typedef struct _SRING_LOCK {
    volatile PMCS_NODE Tail;
    MCS_NODE RingBuffer[RING_BUFFER_SIZE];
    volatile LONG64 Head;
    volatile LONG64 Tail;
    volatile PMCS_NODE HazardPointer;
} SRING_LOCK, *PSRING_LOCK;

/* In Future if needed Size can be increased , but fallback to Dynamic Allocation is present

Chain Linking of Ring Buffer like a MCS Node is planned,

    -------------              -------------              -------------
    |  Buffer Ptr  |           |  Buffer Ptr  |           |  Buffer Ptr  |
    |  Next Node   |<--------  |  Next Node   |<--------  |  Next Node   | <--------  nullptr
     -------------              -------------              -------------
Dynamic Allocation is for now #hack to get Lock Going incase more than 256 Threads try to retrive the lock*/ 

#define RING_BUFFER_SIZE 256  


/* Internal Function for MCS Node Allocation */
PMCS_NODE
KiAllocateRingLockNode(
    _Inout_ PSRING_LOCK Lock
)
{
    LONG64 OldTail, NewTail;
    PMCS_NODE Node;

    do {
        OldTail = Lock->Tail;
        NewTail = (OldTail + 1) & (RING_BUFFER_SIZE - 1);
        
        if (NewTail == (Lock->Head & (RING_BUFFER_SIZE - 1))) 
        {
            /*Ring buffer is full, fallback to dynamic allocation*/
            return ExAllocatePoolWithTag(NonPagedPool, sizeof(MCS_NODE), 'MCSN');
        }
       } while (InterlockedCompareExchange64(&Lock->Tail, NewTail, OldTail) != OldTail);

    Node = &Lock->RingBuffer[OldTail];
    
    /*Set hazard pointer to protect the node from being freed*/
    InterlockedExchangePointer((PVOID*)&Lock->HazardPointer, Node);  
    KeMemoryBarrier();

    return Node;
}
VOID
KiFreeRingLockNode(
    _Inout_ PSRING_LOCK Lock,
    _In_ PMCS_NODE Node
)
{
    if (Node < &Lock->RingBuffer[0] || Node >= &Lock->RingBuffer[RING_BUFFER_SIZE]) {
        /* Node was dynamically allocated, free the pool */
        ExFreePoolWithTag(Node, 'MCSN');
        return;
    }

    /*Clear hazard pointer if it's pointing to this node*/
    if (InterlockedCompareExchangePointer((PVOID*)&Lock->HazardPointer,nullptr, Node) == Node) {
        LONG64 NewHead;
        do {
            NewHead = (Lock->Head + 1) & (RING_BUFFER_SIZE - 1);
        } while (InterlockedCompareExchange64(&Lock->Head, NewHead, Lock->Head) != Lock->Head);
    }
}

/* External Function */ 

KeInitializeSpinningRingLock(
    _Out_ PSRING_LOCK Lock
)
{
    RtlZeroMemory(Lock->RingBuffer, sizeof(Lock->RingBuffer));
    Lock->Head = {0};
    Lock->Tail = {0};
    Lock->HazardPointer = nullptr;
}

VOID
KeAcquireSpinningRingLock(
    _Inout_ PSRING_LOCK Lock
)
{
    PMCS_NODE Node = KiAllocateRingLockNode(Lock);
    PMCS_NODE Predecessor;

    Node->Next = nullptr;
    Node->Locked = 1;

    // Set hazard pointer before we exchange
    InterlockedExchangePointer((PVOID*)&Lock->HazardPointer, Node);
    KeMemoryBarrier();

    Predecessor = (PMCS_NODE)InterlockedExchangePointer((PVOID*)&Lock->Tail, Node);

    if (Predecessor != nullptr) {
        /*Set hazard pointer to predecessor*/
        InterlockedExchangePointer((PVOID*)&Lock->HazardPointer, Predecessor);
        KeMemoryBarrier();

        Predecessor->Next = Node;
        while (Node->Locked) {
            KeYieldProcessor();
        }
    }

    /*Clear hazard pointer*/
    InterlockedExchangePointer((PVOID*)&Lock->HazardPointer, nullptr);
}

VOID
KeReleaseSpinningRingLock(
    _Inout_ PSRING_LOCK Lock
)
{
    PMCS_NODE Node = Lock->Tail;

    // Set hazard pointer
    InterlockedExchangePointer((PVOID*)&Lock->HazardPointer, Node);
    KeMemoryBarrier();

    if (Node->Next == nullptr) {
        if (InterlockedCompareExchangePointer((PVOID*)&Lock->Tail, nullptr, Node) == Node) 
		{
            /*Clear hazard pointer before returning*/
            InterlockedExchangePointer((PVOID*)&Lock->HazardPointer, nullptr);
            KiFreeRingLockNode(Lock, Node);
            return;
        }

        while (Node->Next == nullptr) {
            KeYieldProcessor();
        }
    }

    /*Set hazard pointer to next node*/
    InterlockedExchangePointer((PVOID*)&Lock->HazardPointer, Node->Next);
    KeMemoryBarrier();

    Node->Next->Locked = 0;

    /*Clear hazard pointer*/
    InterlockedExchangePointer((PVOID*)&Lock->HazardPointer, nullptr);

    KiFreeRingLockNode(Lock, Node);
}

/* Understanding Bit Manipulation being done to retrieving data from RING Buffer, 
(Lock->Tail + 1) & (RING_BUFFER_SIZE - 1)
RING_BUFFER_SIZE` is defined as 256, which is a power of 2. This is crucial for this operation to work.
(RING_BUFFER_SIZE - 1)` is 255, which in binary is "11111111"
(Lock->Tail + 1)` increments the tail index. 
Now, bitwise AND operation `&` with `(RING_BUFFER_SIZE - 1)  act like % Operator but much faster in performance */