General Coding Interview Questions (Not Algorithm)
Table of Contents
Numeric Integral
def trapezoidal(low, high, func, division = 100) -> float:
# Trapezoidal rule
grid = np.linspace(low, high, division)
h = (high - low) / division
f_values = func(grid)
_sum = ((f_values[0] + f_values[-1]) / 2 + np.sum(f_values[1:-1])) * h
return _sum
def f(x):
return 1/x
trapezoidal(1, np.e, f, division=1_000_000)
A problem is that, when the dimension of the function parameters become large, we have exponential larger numbers of values to evaluate. One way to reduce that is to use Monte Carlo Integration.
A very good overview is here. on wiki. On a one dimensional case, we are having $I=\frac{b-a}{N} \sum_{i=0}^N f\left(x_i\right)$.
More advanced technique can be used like stratification / change of measure. They most naïve case is to use uniforms on the sample space.
def MC_integration(low, high, func, N = 10_000):
x = np.random.uniform(low, high, size=N)
y = func(x)
integral = ((high - low) / N) * np.sum(y)
return integral
Breaking the stick - simulation
Consider the problem that you break the length 1 stick by a uniform, then choose the longer piece and break it again, derive the probability that the three pieces can be a triangle.
import numpy as np
def simulate_two_breaks_triangle(N):
# need two uniforms for the whole problem
u1 = np.random.uniform(size=(N))
u2 = np.random.uniform(size=(N))
# sort u and 1-u, the second column is the longer piece
first_split = np.vstack((u1, 1 - u1)).T
first_split.sort(axis = 1)
# stack the three pieces, using the second uniform vector to derive the second split
second_split = np.vstack(
(
first_split[:,0],
first_split[:,1] * u2,
first_split[:,1] * (1 - u2)
)
).T
# the only condition here is that no piece greater than 0.5
is_triangle = ((second_split < 0.5).sum(axis = 1) == 3)
avg = is_triangle.mean()
se = np.std(is_triangle) / np.sqrt(N)
return avg, se
Class that returns mean, max, and median
Create a class that does not use any external libraries, which returns the mean, max, and median given a series of numbers.
This is rather simple, except the median is a bit complex that you need to test whether odd / even number of elements here.
class Solution():
def __init__(self, nums: list):
self._n = len(nums)
self._max = max(nums)
self._sum = sum(nums)
self._mean = self._sum / self._n
# calculation of median
if self._n % 2 == 0:
median1 = nums[self._n//2]
median2 = nums[self._n//2 - 1]
self._median = (median1 + median2)/2
else:
self._median = nums[self._n//2]
def get_mean(self):
return self._mean
def get_max(self):
return self._max
def get_median(self):
return self._median
Drunk Passenger
Recall the classic drunk passenger problem and use a simulation program to derive the numerical result.
import numpy as np
def single_sim():
seats = [i for i in range(1, 101)]
for p in range(1, 101):
# the first passenger is drunk and take random seat right away
if p == 1:
take = np.random.choice(a = seats)
if take == 1:
return 1
elif take == 100:
return 0
else:
seats.remove(take)
else:
# a normal passenger will take his own seat first, else random choose
if p not in seats:
take = np.random.choice(a = seats)
if take == 1:
return 1
elif take == 100:
return 0
else:
seats.remove(take)
else:
seats.remove(p)
Test with the following,
N = 100_000
res = [single_sim() for i in range(N)]
np.mean(res)
np.std(res) /np.sqrt(N)
Contiguous Subarray with Max Product
Ok, this problem should be there, as this is a algo problem in dynamic programming from LeetCode 152 with a slight modification that we need to find the positions where the founded array starts and ends.
Additional for loop is written so that we start from where the array must end, which is the number that yields the max product. Then go backward and stop when the array starts, that is the recorded max product equal to the item itself.
class Solution:
def maxProduct(self, nums: list[int]) -> int:
n = len(nums)
dp_max = [nums[0]] * n
dp_min = [nums[0]] * n
s = 0
e = 0
for i in range(1, n):
dp_max[i] = max(dp_max[i-1] * nums[i], dp_min[i-1] * nums[i], nums[i])
dp_min[i] = min(dp_max[i-1] * nums[i], dp_min[i-1] * nums[i], nums[i])
if max(dp_max) > 0:
# find the contiguous subarray start and end
end = np.argmax(dp_max)
product = max(dp_max)
if product == nums[end]:
start = end
else:
for start in range(end - 1, -1, -1):
product = product / nums[start + 1]
if product == nums[start]:
break
else:
continue
print(start)
print(end)
return max(dp_max)
Limit Order Book
The implementation of this is frequently tested in a number of interviews. Well, this is actually a test of knowledge in system design, which is not fair.
There are so many identifiers for a quote, and it must be cumbersome to use a nested dictionary (any depth greater than 2 should be prohibited). So a new class is desired aggregate the information. And in a list we only fill in Quote objects.
from collections import defaultdict
from typing import Optional
class Quote:
def __init__(self, broker_name: str, px: float, qty: int):
self.broker_name = broker_name
self.px = px
self.qty = qty
class PriceChecker:
def __init__(self):
self._ask_book = defaultdict(list)
self._bid_book = defaultdict(list)
def observe_quote(self,
broker_name: str,
stock: str,
bid_px: float,
bid_qty: int,
ask_px: float,
ask_qty: int) -> None:
self._ask_book[stock].append(Quote(broker_name, ask_px, ask_qty))
# from low to high
self._ask_book[stock].sort(key=lambda x: x.px)
self._bid_book[stock].append(Quote(broker_name, bid_px, bid_qty))
# from high to low
self._bid_book[stock].sort(key=lambda x: x.px, reverse=True)
def calculate_avg_price_to_buy(self,
stock: str,
num_shares: int
) -> Optional[float]:
cost = 0
counter = 0
re_shares = num_shares
max_counter = len(self._ask_book[stock])
while re_shares > 0:
# if shares are enough; recheas the maximum quotes
try:
assert counter < max_counter
except AssertionError:
return cost/ (num_shares - re_shares)
# regular updates
realized_shares = min(self._ask_book[stock][counter].qty, re_shares)
cost += realized_shares * self._ask_book[stock][counter].px
re_shares -= realized_shares
counter += 1
return cost / num_shares
res = PriceChecker()
res.observe_quote('A', 'GS', 100, 200, 105, 200)
res.observe_quote('B', 'GS', 95, 200, 110, 200)
print(res.calculate_avg_price_to_buy('GS', 500))
Remove List Duplicates while Keeping Orders
If libraries are not available, then an “algorithm” solution is
seen = set()
[x for x in seq if x not in seen and not seen.add(x)]
Notice that seen.add() always returns a None, and not None will be True all the time. Therefore, an element in x will only added to the output the first time it is seen.
While, starting from Python 3.7, the default dictionary will retain the insertion orders.
items = [1, 2, 0, 1, 3, 2]
list(dict.fromkeys(items))
Formally, the fromkeys() should take two arguments, first is an iterable to generate keys, the second is the default values (None if ignore). Then the list function will turn keys into a list.
Number of Possible Rectangles
Suppose in a two dimensional space you have $n$ points, with integer coordinates. How many possible rectangles you can form?
We can use a $O(n^2)$ algorithm to check any pairs of the points and treat them as the diagonal. If two pair of points has their midpoints the same, and distance the same, then they can form a rectangle.
We use the default dict, and use the pair of mid_point, distance as the identifier(key) to check how many pairs of points share the same values. Then any two of them can form a rectangle.
import itertools
from collections import defaultdict
import numpy as np
from math import comb
def count_rectangles(points: AbstractSet[Tuple[int, int]]) -> int:
res = defaultdict(int)
# turn raw points into pairs and record their mid points and distance
for combo in itertools.combinations(points, 2):
point_1, point_2 = combo[0], combo[1]
distance = np.sqrt((point_1[0] - point_2[0])** 2 + (point_1[1] - point_2[1])**2)
mid_point = ((point_1[0] + point_2[0])/2, (point_1[1] + point_2[1])/2)
res[(mid_point, distance)] += 1
# add up the number of rectangles
counter = 0
for k in res:
if res[k] >= 2:
counter += comb(res[k], 2)
return counter
sample = set([(0,0), (0,1), (1,1), (1,0), (-1,0), (0, -1)])
print(count_rectangles(sample))