Implement a rate limiter attr

Google Interview Question for Senior Software Development Engineers

0

of 0 votes

10
Answers
Implement a rate limiter attribute/decoration/annotation on top of an API endpoint. caps to N requests per minute with a rolling window (implement from scratch / test / compiling + working code. Was made to write the code in front of a computer.
- xyz November 28, 2017 in United States | Report Duplicate | Flag | PURGE
Google Senior Software Development Engineer

Email me when people comment.

An error occurred in subscribing you.

Country: United States

Email me when people comment.

An error occurred in subscribing you.

Comment hidden because of low score. Click to expand.

of 1 vote

/* 
There is a straight forward solution that is like 5 lines
- count the requests in a moving window of an interval. 
- I do this by using a queue and limit it's size. When a token arrives:
  1) I check if the oldest token in the queue can leave the window
  2) I accept the token, if queue size < allowed requests 
     I add the item into the queue
*/

// returns true if process can be processed or false if not
bool processRequest() {
	auto now = steady_clock::now();
	if (!requests_.empty() && (now - requests_.front()) > interval_) requests_.pop();
	if (requests_.size() < requestLimit_) {
		requests_.push(now);
		return true;
	}
	return false;
}

/*	
  This is O(1) time and O(allowed-requests) space

  There are issues and things to consider:
  1) bursts: say, you have 100 requests per minute, but in the first second 100
     requests arrive, how should the system behave? maybe better scale down to
	 1 request every 300 ms? 

  2) synchronization: the need to secure that thing against concurrent access.
     I would expect requests being processed on multiple cores. The C++ queue is a linked 
	 list, so it needs protocetion using a mutex. 
	 
	 It's not good on a high throuhput system, first because mutex is expensive, second
	 because of what happens while it is locked: queue will access the heap which 
	 is expensive and unnecessary (long locking period, long wait, long runtime)
	 
	 Therefore the queue can be replaced with a fixed size ringbuffer. 
	 A lockless version might be possible, but is tricky to get right. 

- The injection,decorater,aspect,attribute thing I spared. If it needs to be fast
  maybe use templates instead of polymorphism in c++ 

- Testing is an other interesting question. I think in order to do that properly I
  need to mock/inject the chrono. So I'd design it to take a chrono interface or
  type

I put the basic implementation into a monte carlo below. 
*/

#include <chrono>
#include <vector>
#include <queue>
#include <thread>
#include <iostream>
#include <iomanip>

using namespace std;
using namespace std::chrono;

class RateLimitter 
{
private:
	queue<steady_clock::time_point> requests_;
	milliseconds interval_;
	unsigned int requestLimit_;

public:
	RateLimitter(milliseconds interval, unsigned int requestLimit)
		: interval_(interval), requestLimit_(requestLimit) {}
	
	bool processRequest() {
		auto now = steady_clock::now();
		if (!requests_.empty() && (now - requests_.front()) > interval_) requests_.pop();
		if (requests_.size() < requestLimit_) {
			requests_.push(now);
			return true;
		}
		return false;
	}
};


int main()
{
	RateLimitter r(milliseconds(1000), 10); // freq: 10/s 
	int produced = 0, processed = 0;
	auto start = system_clock::now();
	while (++produced < 1000) {
		this_thread::sleep_for(milliseconds(rand()%100)); // avg. freq: 20/s		
		if (r.processRequest()) {
			processed++;
			auto elapsed = system_clock::now() - start;
			if (processed % 10 == 0)
				cout << setprecision(3) <<
				"processed: " << processed << " "
				<< static_cast<double>(processed) / (duration_cast<seconds>(elapsed).count() + 1)
				<< " req./s \t produced " << produced << " "
				<< static_cast<double>(produced) / (duration_cast<seconds>(elapsed).count() + 1)
				<< " req./s" << endl;
		}
	}
}

- Chris November 29, 2017 | Flag Reply

Comment hidden because of low score. Click to expand.

of 1 vote

Here the solution that will use a definable number of slots vs. tracking requestLimit-Amount of items in the queue. Que size never exceed slotCt.

class RateLimitter 
{
private:
	queue<std::pair<long long, unsigned int>> requests_;
	long long intervalUs_;
	unsigned int requestLimit_;
	unsigned int slotCt_;
	long long slotLengthUs_;
	unsigned int reqsPerInterv_;

public:
	RateLimitter(long long intervalMs, unsigned int requestLimit, unsigned int slotCt = 100)
		: intervalUs_(intervalMs * 1000), requestLimit_(requestLimit), slotCt_(slotCt),
		  slotLengthUs_(intervalMs * 1000 / slotCt), reqsPerInterv_(0) {
		if (slotLengthUs_ == 0) throw invalid_argument("intervalMs > slotCt");		
	}
	
	bool processRequest() {
		long long nowUs = duration_cast<microseconds>(steady_clock::now().time_since_epoch()).count();
		long long slotId = nowUs / slotLengthUs_;

		// is oldest slot expired
		if (!requests_.empty() && requests_.front().first <= slotId - slotCt_) {
			reqsPerInterv_ -= requests_.front().second; // all that were in this slot get freed
			requests_.pop(); // remove it
		}

		// is there "space"?
		if (reqsPerInterv_ < requestLimit_) {				
			reqsPerInterv_++;
			if (!requests_.empty() && requests_.back().first == slotId) {
				requests_.back().second++; // append to open slot
			} else {
				requests_.push({slotId, 1}); // create new slot with count 1
			}
			return true;
		}
		return false;
	}
};

- Chris November 29, 2017 | Flag Reply

Comment hidden because of low score. Click to expand.

of 0 vote

Using a circular array and Decorator design pattern.

#include <iostream>
#include <vector>
#include <unistd.h>

using namespace std;

class Server {
	public:
		virtual void ProcessRequest() = 0;
};

class MyServer : public Server {
	public:
		virtual void ProcessRequest()
		{
			cout << "request processed\n";
		}
};

class RateLimiter : public Server {
	public:
		RateLimiter(Server *server, int interval, int limit)
		{
			server_ = server;
			limit_ = limit;
			interval_ = interval;
			counts_.resize(interval, 0);
			start_time_ = 0;
			rate_ = 0;
		}
		virtual void ProcessRequest()
		{
			if (RequestAllowed()) {
				server_->ProcessRequest();
			} else {
				cout << "request discarded\n";
			}
		}

	private:
		bool RequestAllowed()
		{
			uint32_t current_time = time(NULL);
			start_time_ = max(start_time_, current_time - interval_ * 2);
			while (start_time_ <= current_time - interval_) {
				rate_ -= counts_[start_time_ % interval_];
				counts_[start_time_ % interval_] = 0;
				++start_time_;
			}

			if (rate_ >= limit_) {
				return false;
			} else {
				++counts_[current_time % interval_];
				++rate_;
				return true;
			}
		}

		int interval_, limit_, rate_;
		uint32_t start_time_;
		vector<int> counts_;
		Server *server_;
};

int main() {
	MyServer my_server;
	RateLimiter rate_limiter(&my_server, 60, 10);
	Server *server = &rate_limiter;
	while (true) {
		server->ProcessRequest();
		sleep(1);
	}
}

- Alex November 29, 2017 | Flag Reply

Comment hidden because of low score. Click to expand.

of 0 vote

@ChrisK. It may be worth to check if back() of the queue is too old and if so, to clear the queue (maybe by replacing it with another instance). E.g. there are millions requests within the interval, then, no requests within a time greater than the interval. In this case, currently, the first request that comes after the gap will be iterating the millions of requests sitting in the queue.

- Alex November 29, 2017 | Flag Reply

Comment hidden because of low score. Click to expand.

of 0 vote

@Alex, not sure I understood your question... Are you saying the queue is full with 1 Mio item because that's the limit per interval. Then at least a whole interval passes with no requests and now an other burst arrives?

Then, when the first of these requests arrives, it will see, the queue is full, it will only remove the oldest item and place itself in the back. The next item does the same until one item arrives that can't remove from the front because the front is not old enough - that would work with 1 Mio items, if you want to spend the memory for that. If it's a ring-buffer it's the same, nothing ever get's moved, copied or iterated. I would question the 1 Mio / interval and whether it's worth spending the memory for that purpose, but it would work. If it's 1 Mio, I definitely would start to think about compacting into sub-intervals and counting. Solution would still be O(1) but much more complicated.

I think that's the charm of the solution, it does never need to iterate, so it's always plain O(1) - not compensated, neat for high throughput.

- Chris November 29, 2017 | Flag Reply

Comment hidden because of low score. Click to expand.

of 0 vote

@ChrisK. Sorry, I just misunderstood it for the first look. It's cool, except for memory consumption. Each request takes 4 bytes, and the memory never gets freed.

- Alex November 29, 2017 | Flag Reply

Comment hidden because of low score. Click to expand.

of 0 vote

@Alex: No problem, but Memory is max-requests per interval, not number of requests. The queue never grows larger than max-requests, note the "requests_.pop();" :-) - The ringbuffer would be of that size from the beginning, just like your solution...

- Chris November 29, 2017 | Flag Reply

Comment hidden because of low score. Click to expand.

of 0 vote

@ChrisK. Yes, I see the queue size won't exceed the rate limit.
Regarding memory consumption our solutions differ. Size of my ring buffer is not rate limit. It's interval. E.g. if interval is 60 seconds, and the limit is 1 million, the buffer will be 60 integers.

- Alex November 29, 2017 | Flag Reply

Comment hidden because of low score. Click to expand.

of 0 vote

@Alex, that's right as well. I'd as well create a histo if I had to reduce space and can't shorten the interval.

- Chris November 29, 2017 | Flag Reply

Comment hidden because of low score. Click to expand.

of 0 vote

We can use a semaphore (using same queue concept), to give a specific number of threads access and keep remaining waiting.

Java code -

public class Process {

	Semaphore semaphore = new Semaphore(5);
	

	public static void main(String[] args) {
		Process example = new Process();
		example.work();
	}
	
	public void work(){
		Counter counter = new Counter();
		for(int i = 0; i <= 40; i++){
			Thread t = new Thread(new Runner("T"+i, semaphore, counter));
			t.start();
		}
	}
}

class Runner implements Runnable{
	String name;
	Semaphore semaphore;
	Counter counter;
	
	public Runner(String name, Semaphore semaphore, Counter counter) {
		super();
		this.name = name;
		this.semaphore = semaphore;
		this.counter = counter;
	}
	@Override
	public void run() {
		try {
			semaphore.acquire();
			counter.incrementCount();
			System.out.println("Running thread " + name + " current active count = " + counter.getCount() + " No. of threads on wait = " + semaphore.getQueueLength());
			Thread.sleep(2000);
		} catch (InterruptedException e) {
			e.printStackTrace();
		}finally{
			counter.decrementCount();
			semaphore.release();
		}
	}
}
class Counter{
	private static int count = 0;
    private static final Object countLock = new Object();

    public void incrementCount() {
        synchronized (countLock) {
            count++;
        }
    }
    public void decrementCount() {
        synchronized (countLock) {
            count--;
        }
    }
    public int getCount() {
        synchronized (countLock) {
            return count;
        }
    }
}

- sudip.innovates November 29, 2017 | Flag Reply

Comment hidden because of low score. Click to expand.

of 0 vote

package com.ds.general;
import java.time.*;
import java.util.*;
 
public class RateLimiter {
	static class Req{
		Instant start;
		int id;
		Req(Instant s,int i){
			start = s; id = i;
		}
	}
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		Queue<Req> queue = new ArrayDeque<Req>();
		int limit = 3, interval = 5;
		for(int i=0;i<=10;i++) {
			try {
				Req req = new Req(null,i);
				boolean b = false;
				while(!b) {
					b = process(queue,req,limit,interval);
					System.out.println("Request id:"+i+" processed:"+b);
					Thread.sleep(1000);
				}
			}
			catch(Exception e) {
				
			}
		}
	}
	
	static boolean process(Queue<Req> queue, Req req,int limit, int interval) {
		boolean out = false;
		if(queue.size() < limit) {
			req.start = Instant.now();
			queue.offer(req);
			out = true;
		}
		else{
			Req head = queue.peek();
			long t = timeDiff(head.start,Instant.now());
			if(t >= interval * 1000) {
				queue.poll();
				req.start = Instant.now();
				queue.offer(req);
				out = true;
			}else {
				out = false;
			}
		}
		return out;
	}
	static long timeDiff(Instant start, Instant end) {
		return Duration.between(start, end).toMillis();
	}

}

- Chandra February 26, 2019 | Flag Reply

CareerCup

Google Interview Question for Senior Software Development Engineers

Books

Videos

Resume Review

Mock Interviews