This page is for an old version of Hexaly Optimizer.
We recommend that you update your version and read the documentation for the
latest stable release.
K-means¶
Principles learned¶
- Define the actual set of decision variables
- Define the float bounds from the data
- Use of non-linearities: minimizing a sum of “min”
Problem¶
Given a sample of n observations and a set of quantitative variables, the goal is to partition these observations into k clusters. Clusters are defined by their center of gravity. Each observation belongs to the cluster with the nearest center of gravity. For more details, see Wikipedia.
Download the exampleData¶
The format of the data is as follows:
- 1st line: number of observations and number of variables
- For each observation: coordinate along each variable, and the actual cluster it belongs to
Program¶
The only decision variables are x[c][v]
to define the coordinate of the
center of gravity of cluster c along variable v. There is no need for variables
to determine which cluster an obervation belongs to: the distance with the
closest cluster is computed thanks to the min operator. The objective is to
minimize the sum of these distances.
- Execution:
- localsolver kmeans.lsp inFileName=instances/irit.dat [lsTimeLimit=] [solFileName=] [k=val]
/********** kmeans.lsp **********/
use io;
/* Reads instance data */
function input(){
usage = "\nUsage: localsolver kmeans.lsp "
+ "inFileName=inputFile [solFileName=outputFile] [lsTimeLimit=timeLimit] [k=value]\n";
if (inFileName == nil) throw usage;
local f = io.openRead(inFileName);
nbObservations = f.readInt();
nbVariables = f.readInt();
if (k == nil){
k = 2;
}
for[o in 1..nbObservations]{
for[v in 1..nbVariables]{
M[o][v] = f.readDouble();
}
clusters[o] = f.readString();
}
// mini[v] and maxi[v] are respectively the minimum and maximum of the vth variable
for[v in 1..nbVariables]{
mini[v] = min[o in 1..nbObservations](M[o][v]);
maxi[v] = max[o in 1..nbObservations](M[o][v]);
}
}
/* Declares the optimization model. */
function model(){
// x[c][v] is the coordinate for the center of gravity of the cluster c along the variable v
x[1..k][v in 1..nbVariables] <- float(mini[v], maxi[v]);
// d[o] is the distance between observation o and the nearest cluster (its center of gravity)
d[o in 1..nbObservations] <- min[c in 1..k](sum[v in 1..nbVariables](pow(x[c][v] - M[o][v], 2)));
// Minimize the total distance
obj <- sum[o in 1..nbObservations](d[o]);
minimize obj;
}
/* Parameterizes the solver. */
function param(){
if(lsTimeLimit == nil) lsTimeLimit=5;
}
/* Writes the solution in a file in the following format:
* - objective value
* - k
* - for each cluster, the coordinates along each variable
*/
function output(){
if(solFileName == nil) return;
local solFile = io.openWrite(solFileName);
solFile.println(obj.value);
solFile.println(k);
for[c in 1..k] {
for[v in 1..nbVariables] {
solFile.print(x[c][v].value + " ");
}
solFile.println();
}
}
- Execution (Windows)
- set PYTHONPATH=%LS_HOME%\bin\python27\python kmeans.py instances\irit.dat
- Execution (Linux)
- export PYTHONPATH=/opt/localsolver_XXX/bin/python27/python kmeans.py instances/irit.dat
########## kmeans.py ##########
import localsolver
import sys
if len(sys.argv) < 2:
print ("Usage: python kmeans.py inputFile [outputFile] [timeLimit] [k value]")
sys.exit(1)
def read_elem(filename):
with open(filename) as f:
return [str(elem) for elem in f.read().split()]
with localsolver.LocalSolver() as ls:
#
# Reads instance data
#
if len(sys.argv) > 4: k = int(sys.argv[4])
else: k = 2
file_it = iter(read_elem(sys.argv[1]))
# Data properties
nbObservations = int(file_it.next())
nbVariables = int(file_it.next())
M = [None]*nbObservations
qualitative = [None]*nbObservations
for o in range(nbObservations):
M[o] = [None]*(nbVariables)
for v in range(nbVariables):
M[o][v] = float(file_it.next())
qualitative[o] = file_it.next()
# mini[v] and maxi[v] are respectively the minimum and maximum of the vth variable
mini = [min(M[o][v] for o in range(nbObservations)) for v in range(nbVariables)]
maxi = [max(M[o][v] for o in range(nbObservations)) for v in range(nbVariables)]
#
# Declares the optimization model
#
model = ls.model
# x[o][v] are decision variables equal to the center of gravity of the kth group for the jit variable
x = [[model.float(mini[v], maxi[v]) for v in range(nbVariables)] for c in range(k)]
# d[o] is the distance between observation o and the nearest cluster (its center of gravity)
d = [model.min(model.sum((x[c][v] - M[o][v])**2 for v in range(nbVariables)) for c in range(k))
for o in range(nbObservations)]
# Minimize the total distance
obj = model.sum(d)
model.minimize(obj)
model.close()
#
# Parameterizes the solver
#
ls.param.nb_threads = 2
if len(sys.argv) > 3: ls.create_phase().time_limit = int(sys.argv[3])
else: ls.create_phase().time_limit = 5
ls.solve()
#
# Writes the solution in a file in the following format:
# - objective value
# - k
# - for each cluster, the coordinates along each variable
#
if len(sys.argv) > 2:
with open(sys.argv[2], 'w') as f:
f.write("%f\n" % obj.value)
f.write("%d\n" % k)
for c in range(k):
for v in range(nbVariables):
f.write("%f " % x[c][v].value)
f.write("\n")
- Compilation / Execution (Windows)
- cl /EHsc kmeans.cpp -I%LS_HOME%\include /link %LS_HOME%\bin\localsolver.dll.libkmeans instances\irit.dat
- Compilation / Execution (Linux)
- g++ kmeans.cpp -I/opt/localsolver_XXX/include -llocalsolver -lpthread -o kmeans./kmeans instances/irit.dat
/********** kmeans.cpp **********/
#include <iostream>
#include <fstream>
#include <vector>
#include <limits>
#include "localsolver.h"
using namespace localsolver;
using namespace std;
class Kmeans {
public:
/* Data properties */
int nbObservations;
int nbVariables;
int k;
vector<vector<lsdouble> > M;
vector<string> clusters;
// mini[v] and maxi[v] are respectively the minimum and maximum of the vth variable
vector<lsdouble> mini;
vector<lsdouble> maxi;
/* Solver. */
LocalSolver localsolver;
/* Decisions */
vector<vector<LSExpression> > x;
/* Objective */
LSExpression obj;
/* Constructor */
Kmeans(int k) : k(k) {
if (k < 1) {
cerr << "Error: k=" << k << " it is supposed to be greater or equal than 1 " << endl;
exit(1);
}
}
/* Reads instance data */
void readInstance(const string& fileName) {
ifstream infile(fileName.c_str());
if (!infile.is_open()) {
cerr << "File " << fileName << " cannot be opened." << endl;
exit(1);
}
infile >> nbObservations;
infile >> nbVariables;
M.resize(nbObservations);
clusters.resize(nbObservations);
mini.resize(nbVariables, numeric_limits<lsdouble>::infinity());
maxi.resize(nbVariables, -numeric_limits<lsdouble>::infinity());
for (int o = 0; o < nbObservations; o++) {
M[o].resize(nbVariables);
for (int v = 0; v < nbVariables; v++) {
infile >> M[o][v];
if (M[o][v] < mini[v]) mini[v] = M[o][v];
if (M[o][v] > maxi[v]) maxi[v] = M[o][v];
}
infile >> clusters[o];
}
}
void solve(int limit) {
try {
/* Declares the optimization model. */
LSModel model = localsolver.getModel();
// x[c][v] is the coordinate for the center of gravity of the cluster c along the variable v
x.resize(k);
for (int c = 0; c < k; c++) {
x[c].resize(nbVariables);
for (int v = 0; v < nbVariables; v++) {
x[c][v] = model.floatVar(mini[v], maxi[v]);
}
}
// d[o] is the distance between observation o and the nearest cluster (its center of gravity)
vector<LSExpression> d(nbObservations);
for(int o = 0; o < nbObservations; o++) {
d[o] = model.min();
for(int c = 0; c < k; c++) {
LSExpression distanceCluster = model.sum();
for(int v = 0; v < nbVariables; v++) {
distanceCluster += model.pow(x[c][v] - M[o][v], 2);
}
d[o].addOperand(distanceCluster);
}
}
// Minimize the total distance
obj = model.sum(d.begin(), d.end());
model.minimize(obj);
model.close();
/* Parameterizes the solver. */
LSPhase phase = localsolver.createPhase();
phase.setTimeLimit(limit);
localsolver.solve();
} catch (const LSException& e) {
cerr << "LSException:" << e.getMessage() << endl;
exit(1);
}
}
/* Writes the solution in a file in the following format:
* - objective value
* - k
* - for each cluster, the coordinates along each variable
*/
void writeSolution(const string& fileName) {
ofstream outfile(fileName.c_str());
if (!outfile.is_open()) {
cerr << "File " << fileName << " cannot be opened." << endl;
exit(1);
}
outfile << obj.getDoubleValue() << endl;
outfile << k << endl;
for(int c = 0; c < k; c++) {
for(int v = 0; v < nbVariables; v++) {
outfile << x[c][v].getDoubleValue() << " ";
}
outfile << endl;
}
outfile.close();
}
};
int main(int argc, char** argv) {
if (argc < 2) {
cerr << "Usage: kmeans inputFile [outputFile] [timeLimit] [k value]" << endl;
exit(1);
}
const char* instanceFile = argv[1];
const char* solFile = argc > 2 ? argv[2] : NULL;
const char* strTimeLimit = argc > 3 ? argv[3] : "5";
const char* k = argc > 4 ? argv[4] : "2";
Kmeans model(atoi(k));
model.readInstance(instanceFile);
model.solve(atoi(strTimeLimit));
if(solFile != NULL)
model.writeSolution(solFile);
return 0;
}
- Compilation / Execution (Windows)
- javac Kmeans.java -cp %LS_HOME%\bin\localsolver.jarjava -cp %LS_HOME%\bin\localsolver.jar;. Kmeans instances\irit.dat
- Compilation/Execution (Linux)
- javac Kmeans.java -cp /opt/localsolver_XXX/bin/localsolver.jarjava -cp /opt/localsolver_XXX/bin/localsolver.jar:. Kmeans instances/irit.dat
/********** Kmeans.java **********/
import java.util.*;
import java.io.*;
import localsolver.*;
public class Kmeans {
/* Data properties */
int nbObservations;
int nbVariables;
int k;
double[][] M;
String[] clusters;
// mini[v] and maxi[v] are respectively the minimum and maximum of the vth variable
double[] mini;
double[] maxi;
/* Solver. */
LocalSolver localsolver;
/* Decisions. */
LSExpression[][] x;
/* Objective. */
LSExpression obj;
Kmeans(int k) {
this.k = k;
}
/* Reads instance data */
void readInstance(String fileName) {
try {
Scanner input = new Scanner(new File(fileName));
input.useLocale(Locale.ROOT);
nbObservations = input.nextInt();
nbVariables = input.nextInt();
M = new double[nbObservations][nbVariables];
clusters = new String[nbObservations];
mini = new double[nbVariables];
maxi = new double[nbVariables];
Arrays.fill(mini, Double.POSITIVE_INFINITY);
Arrays.fill(maxi, Double.NEGATIVE_INFINITY);
for (int o = 0; o < nbObservations; o++) {
for (int v = 0; v < nbVariables; v++) {
M[o][v] = input.nextDouble();
if (M[o][v] < mini[v]) mini[v] = M[o][v];
else if (M[o][v] > maxi[v]) maxi[v] = M[o][v];
}
clusters[o] = input.next();
}
} catch (IOException ex) {
ex.printStackTrace();
}
}
void solve(int limit) {
try {
localsolver = new LocalSolver();
/* Declares the optimization model. */
LSModel model = localsolver.getModel();
// x[c][v] is the coordinate for the center of gravity of the cluster c along the variable v
x = new LSExpression[k][nbVariables];
for (int c = 0; c < k; c++) {
for (int v = 0; v < nbVariables; v++) {
x[c][v] = model.floatVar(mini[v] ,maxi[v]);
}
}
// d[o] is the distance between observation o and the nearest cluster (its center of gravity)
LSExpression[] d = new LSExpression[nbObservations];
for (int o = 0; o < nbObservations; o++) {
d[o] = model.min();
for (int c = 0; c < k; c++) {
LSExpression distanceCluster = model.sum();
for (int v = 0; v < nbVariables; v++) {
distanceCluster.addOperand(model.pow(model.sub(x[c][v], M[o][v]), 2));
}
d[o].addOperand(distanceCluster);
}
}
// Minimize the total distance
obj = model.sum(d);
model.minimize(obj);
model.close();
/* Parameterizes the solver. */
LSPhase phase = localsolver.createPhase();
phase.setTimeLimit(limit);
localsolver.solve();
} catch (LSException e) {
System.out.println("LSException:" + e.getMessage());
System.exit(1);
}
}
/* Writes the solution in a file in the following format:
* - objective value
* - k
* - for each cluster, the coordinates along each variable
*/
void writeSolution(String fileName) {
try {
BufferedWriter output = new BufferedWriter(new FileWriter(fileName));
output.write(obj.getDoubleValue() + "\n");
output.write(k + "\n");
for (int c = 0; c < k; c++) {
for (int v = 0; v < nbVariables; v++) {
output.write(x[c][v].getDoubleValue() + " ");
}
output.write("\n");
}
output.close();
} catch (IOException ex) {
ex.printStackTrace();
}
}
public static void main(String[] args) {
if (args.length < 1) {
System.out.println("Usage: java Kmeans inputFile [outputFile] [timeLimit] [k value]");
System.exit(1);
}
String instanceFile = args[0];
String outputFile = args.length > 1 ? args[1] : null;
String strTimeLimit = args.length > 2 ? args[2] : "5";
String k = args.length > 3 ? args[3] : "2";
Kmeans model = new Kmeans(Integer.parseInt(k));
model.readInstance(instanceFile);
model.solve(Integer.parseInt(strTimeLimit));
if(outputFile != null) {
model.writeSolution(outputFile);
}
}
}
- Compilation/Execution (Windows)
- copy %LS_HOME%\bin\*net.dll .csc Kmeans.cs /reference:localsolvernet.dllKmeans instances\irit.dat
/********** kmeans.cs **********/
using System;
using System.IO;
using localsolver;
public class Kmeans : IDisposable
{
/* Data properties */
int nbObservations;
int nbVariables;
int k;
double[,] M;
string[] clusters;
// mini[v] and maxi[v] are respectively the minimum and maximum of the vth variable
double[] mini;
double[] maxi;
/* Solver. */
LocalSolver localsolver;
/* Decisions. */
LSExpression[,] x;
/* Objective. */
LSExpression obj;
public Kmeans(int k)
{
localsolver = new LocalSolver();
this.k = k;
}
/* Reads instance data */
public void ReadInstance (string fileName)
{
using (StreamReader input = new StreamReader(fileName))
{
string[] splittedLine = input.ReadLine().Split();
nbObservations = int.Parse(splittedLine[0]);
nbVariables = int.Parse(splittedLine[1]);
mini = new double[nbVariables];
maxi = new double[nbVariables];
for (int v = 0; v < nbVariables; v++)
{
mini[v] = double.PositiveInfinity;
maxi[v] = double.NegativeInfinity;
}
M = new double[nbObservations,nbVariables];
clusters = new string[nbObservations];
for (int o = 0; o < nbObservations; o++)
{
splittedLine = input.ReadLine().Split();
for (int v = 0; v < nbVariables; v++)
{
M[o,v] = double.Parse(splittedLine[v]);
if (M[o,v] < mini[v]) mini[v] = M[o,v];
if (M[o,v] > maxi[v]) maxi[v] = M[o,v];
}
clusters[o] = splittedLine[nbVariables];
}
}
}
public void Dispose ()
{
localsolver.Dispose();
}
public void Solve(int limit)
{
/* Declares the optimization model */
LSModel model = localsolver.GetModel();
// x[c][v] is the coordinate for the center of gravity of the cluster c along the variable v
x = new LSExpression[k,nbVariables];
for (int o = 0; o < k; o++)
{
for (int v = 0; v < nbVariables; v++)
{
x[o,v] = model.Float(mini[v] ,maxi[v]);
}
}
// d[o] is the distance between observation o and the nearest cluster (its center of gravity)
LSExpression[] d = new LSExpression[nbObservations];
for (int o = 0; o < nbObservations; o++)
{
d[o] = model.Min();
for (int c = 0; c < k; c++)
{
LSExpression distanceCluster = model.Sum();
for (int v = 0; v < nbVariables; v++)
{
distanceCluster.AddOperand(model.Pow(x[c, v] - M[o, v], 2));
}
d[o].AddOperand(distanceCluster);
}
}
// Minimize the total distance
obj = model.Sum(d);
model.Minimize(obj);
model.Close();
/* Parameterizes the solver. */
LSPhase phase = localsolver.CreatePhase();
phase.SetTimeLimit(limit);
localsolver.Solve();
}
/* Writes the solution in a file in the following format:
* - objective value
* - k
* - for each cluster, the coordinates along each variable
*/
public void WriteSolution(string fileName)
{
using (StreamWriter output = new StreamWriter(fileName))
{
output.WriteLine(obj.GetDoubleValue());
output.WriteLine(k);
for (int c = 0; c < k; c++)
{
for (int v = 0; v < nbVariables; v++)
{
output.Write(x[c, v].GetDoubleValue() + " ");
}
output.WriteLine();
}
output.Close();
}
}
public static void Main(string[] args)
{
if (args.Length < 1)
{
Console.WriteLine("Usage: Kmeans inputFile [outputFile] [timeLimit] [k value]");
System.Environment.Exit(1);
}
string instanceFile = args[0];
string outputFile = args.Length > 1 ? args[1] : null;
string strTimeLimit = args.Length > 2 ? args[2] : "5";
string k = args.Length > 3 ? args[3] : "2";
using (Kmeans model = new Kmeans(int.Parse(k)))
{
model.ReadInstance(instanceFile);
model.Solve(int.Parse(strTimeLimit));
if(outputFile != null)
{
model.WriteSolution(outputFile);
}
}
}
}