Stock Clustering Example

In [2]:

Copied!





import cvxpy as cp
import numpy as np
import pandas as pd
import time
import cvxpy as cp
import numpy as np
import pandas as pd
import time

In [14]:

Copied!





filename = 'cluster_30_10.csv'
firstline = pd.read_csv(filename,nrows=1,header=None)
N = int(firstline.values[0][0]) ; K = int(firstline.values[0][1])
df = pd.read_csv(filename,skiprows=[0],header=None)
RHO = df.values
K = 10
filename = 'cluster_30_10.csv'
firstline = pd.read_csv(filename,nrows=1,header=None)
N = int(firstline.values[0][0]) ; K = int(firstline.values[0][1])
df = pd.read_csv(filename,skiprows=[0],header=None)
RHO = df.values
K = 10

Stock Clustering¶

We implement the problem we defined in the previous section as below. Two approaches,

Mixed integer programming
Lagrangian Relaxation

In [17]:

Copied!





x = cp.Variable((N,N),boolean=True)
y = cp.Variable(N,boolean=True)
obj = cp.Maximize(cp.sum(cp.multiply(RHO,x)))
constraints = [cp.sum(y) == K]
constraints += [
    cp.sum(x[i,:]) == 1 for i in range(N)
]
constraints += [
    x[i,:] <= y for i in range(N)
]
prob = cp.Problem(obj,constraints)
stime = time.time()
prob.solve(solver=cp.GUROBI)
print('Optimal value found = ',np.sum(RHO*x.value))
print('Elapsed time = ',time.time()-stime)
x = cp.Variable((N,N),boolean=True)
y = cp.Variable(N,boolean=True)
obj = cp.Maximize(cp.sum(cp.multiply(RHO,x)))
constraints = [cp.sum(y) == K]
constraints += [
    cp.sum(x[i,:]) == 1 for i in range(N)
]
constraints += [
    x[i,:] <= y for i in range(N)
]
prob = cp.Problem(obj,constraints)
stime = time.time()
prob.solve(solver=cp.GUROBI)
print('Optimal value found = ',np.sum(RHO*x.value))
print('Elapsed time = ',time.time()-stime)

Optimal value found =  21800.0
Elapsed time =  0.05299806594848633

In [31]:

Copied!

print("IS Representative in each cluster: ", y.value)
print("IS Representative in each cluster: ", y.value)

IS Representative in each cluster:  [ 1.  0.  0. -0. -0. -0.  0.  0.  0. -0.  0.  1. -0.  1.  0.  1.  1.  1.
  0. -0.  1.  0. -0.  1.  1. -0. -0.  0. -0.  1.]

Now we import an external function to use the Lagrangian relaxation to approach this problem

In [19]:

Copied!

from Cluster import *
u, besty, bestx, bestsol = SGalgo(RHO,K,verbose=False)
from Cluster import *
u, besty, bestx, bestsol = SGalgo(RHO,K,verbose=False)

Iteration 23: Best Lagrangian = 21800.558549391273, Best solution = 21800.0
Optimal value found = 21800.0

In [28]:

Copied!

print("IS Representative in each cluster: ", besty)
print("IS Representative in each cluster: ", besty)

IS Representative in each cluster:  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 1. 1. 1. 0. 0. 1. 0. 0. 1.
 1. 0. 0. 0. 0. 1.]

Knapsack Problem¶

In [5]:

Copied!





# values for items
v = np.array([1, 2, 3])
# weights
w = np.array([4, 5, 1])
# total weight constraint
W = 4

n = len(v)
x = cp.Variable(n, boolean=True)
# maximize the total values while not exceeding the constraint
prob = cp.Problem(cp.Maximize(v.T@x), [w.T@x <= W])
prob.solve()
# show the decision
x.value
# values for items
v = np.array([1, 2, 3])
# weights
w = np.array([4, 5, 1])
# total weight constraint
W = 4

n = len(v)
x = cp.Variable(n, boolean=True)
# maximize the total values while not exceeding the constraint
prob = cp.Problem(cp.Maximize(v.T@x), [w.T@x <= W])
prob.solve()
# show the decision
x.value

Out[5]:

array([0., 0., 1.])