Stock Clustering Example
In [2]:
Copied!
import cvxpy as cp
import numpy as np
import pandas as pd
import time
import cvxpy as cp
import numpy as np
import pandas as pd
import time
In [14]:
Copied!
filename = 'cluster_30_10.csv'
firstline = pd.read_csv(filename,nrows=1,header=None)
N = int(firstline.values[0][0]) ; K = int(firstline.values[0][1])
df = pd.read_csv(filename,skiprows=[0],header=None)
RHO = df.values
K = 10
filename = 'cluster_30_10.csv'
firstline = pd.read_csv(filename,nrows=1,header=None)
N = int(firstline.values[0][0]) ; K = int(firstline.values[0][1])
df = pd.read_csv(filename,skiprows=[0],header=None)
RHO = df.values
K = 10
Stock Clustering¶
We implement the problem we defined in the previous section as below. Two approaches,
- Mixed integer programming
- Lagrangian Relaxation
In [17]:
Copied!
x = cp.Variable((N,N),boolean=True)
y = cp.Variable(N,boolean=True)
obj = cp.Maximize(cp.sum(cp.multiply(RHO,x)))
constraints = [cp.sum(y) == K]
constraints += [
cp.sum(x[i,:]) == 1 for i in range(N)
]
constraints += [
x[i,:] <= y for i in range(N)
]
prob = cp.Problem(obj,constraints)
stime = time.time()
prob.solve(solver=cp.GUROBI)
print('Optimal value found = ',np.sum(RHO*x.value))
print('Elapsed time = ',time.time()-stime)
x = cp.Variable((N,N),boolean=True)
y = cp.Variable(N,boolean=True)
obj = cp.Maximize(cp.sum(cp.multiply(RHO,x)))
constraints = [cp.sum(y) == K]
constraints += [
cp.sum(x[i,:]) == 1 for i in range(N)
]
constraints += [
x[i,:] <= y for i in range(N)
]
prob = cp.Problem(obj,constraints)
stime = time.time()
prob.solve(solver=cp.GUROBI)
print('Optimal value found = ',np.sum(RHO*x.value))
print('Elapsed time = ',time.time()-stime)
Optimal value found = 21800.0 Elapsed time = 0.05299806594848633
In [31]:
Copied!
print("IS Representative in each cluster: ", y.value)
print("IS Representative in each cluster: ", y.value)
IS Representative in each cluster: [ 1. 0. 0. -0. -0. -0. 0. 0. 0. -0. 0. 1. -0. 1. 0. 1. 1. 1. 0. -0. 1. 0. -0. 1. 1. -0. -0. 0. -0. 1.]
Now we import an external function to use the Lagrangian relaxation to approach this problem
In [19]:
Copied!
from Cluster import *
u, besty, bestx, bestsol = SGalgo(RHO,K,verbose=False)
from Cluster import *
u, besty, bestx, bestsol = SGalgo(RHO,K,verbose=False)
Iteration 23: Best Lagrangian = 21800.558549391273, Best solution = 21800.0 Optimal value found = 21800.0
In [28]:
Copied!
print("IS Representative in each cluster: ", besty)
print("IS Representative in each cluster: ", besty)
IS Representative in each cluster: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 1. 1. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 1.]
Knapsack Problem¶
In [5]:
Copied!
# values for items
v = np.array([1, 2, 3])
# weights
w = np.array([4, 5, 1])
# total weight constraint
W = 4
n = len(v)
x = cp.Variable(n, boolean=True)
# maximize the total values while not exceeding the constraint
prob = cp.Problem(cp.Maximize(v.T@x), [w.T@x <= W])
prob.solve()
# show the decision
x.value
# values for items
v = np.array([1, 2, 3])
# weights
w = np.array([4, 5, 1])
# total weight constraint
W = 4
n = len(v)
x = cp.Variable(n, boolean=True)
# maximize the total values while not exceeding the constraint
prob = cp.Problem(cp.Maximize(v.T@x), [w.T@x <= W])
prob.solve()
# show the decision
x.value
Out[5]:
array([0., 0., 1.])