function [dcpred_model] = dcpred_train(trainy, trainX, levels, gamma, c, ratio, ratio_clustering)
%
%
%   Usage:
%
%   [model] = dcpred_train(trainy, trainX, levels, gamma, c)
%   
%   Input Arguments: 
%         trainy -- training labels (n by 1 double array)
%         trainX -- training samples (n by d "sparse" double array)
%         levels -- number of hierarchical levels
%         gamma -- gamma in RBF kernel
%         c -- the penalty in kernel SVM
%         ratio -- the ratio of landmark points (optional)
%         ratio_clustering -- the smallest clustering size (optional)
%
%   Output Aruments: 
%         dcpred_model -- the structure contains: 
%               dcpred_model.hyperplanes: the clustering hyperplanes (with shifts)
%               dcpred_model.num_map: the number of inner products for each cluster
%               dcpred_model.landmarks{i}: the landmark points for the i-th cluster
%               dcpred_model.quad{i}:  the quadratic spanning terms in the i-th cluster
%               dcpred_model.w{i}: the linear SVM model
%               dcpred_model.isoneclass{i}: 1000--two classes
%               dcpred_model.levels 
%              

addpath('../liblinear-1.96/matlab/');
addpath('../libsvm-3.14-nobias/matlab/');

ntrain = size(trainX,1);
d = size(trainX,2);
aaa = randperm(ntrain);
nsubsamples = min(200000,ntrain);
if (exist('ratio') == 0)
	ratio = 1;
end
if (exist('ratio_clustering')==0)
	ratio_clustering = 0.0001;
end


fprintf('DcPredTrain: running hierarchical clustering. \n');
timebegin = cputime;

ncluster = 2^levels;
if (ncluster > 16)
	[idx1 hyperplanes] = hkmeans(trainX(aaa(1:nsubsamples),:)', levels, 15, ratio_clustering);
	%[idx1 hyperplanes num_map] = hkmeans(trainX', levels, 15, 0.002);
	[idx innerproducts] = hkmeans_predict(trainX', levels, hyperplanes);
	dcpred_model.cluster_mode = 0;
else
	[idx1 centers] = mykmeans(trainX(aaa(1:nsubsamples),:)', ncluster, 15);
	dis = sum(trainX.*trainX,2)*ones(1,ncluster)+ones(ntrain,1)*(sum(centers.*centers,1))-2*trainX*centers;
	[v idx] = min(dis');
	hyperplanes = centers;
	dcpred_model.cluster_mode = 1;
end

fprintf('DcPredTrain: clustering time: %g secs. \n', cputime - timebegin);
timebegin = cputime;
fprintf('DcPredTrain: training. \n');

num_map = zeros(ncluster,1);
libsvmcmd = sprintf('-c %g -g %g -m 8000 ', c, gamma );
liblinearcmd = sprintf('-c %g -s 2 -B -1 ', c);
%ratio = 1;
dcpred_model.w = {};
dcpred_model.hyperplanes = hyperplanes;
dcpred_model.num_map = num_map;
dcpred_model.landmarks = {};
for i=1:ncluster
	dcpred_model.landmarks{i} = [];
	dcpred_model.w{i} = [];
	dcpred_model.isoneclass{i} = 1000;
end
dcpred_model.quad = {};
dcpred_model.levels = levels;
for i=1:ncluster
	subset = find(idx==i);
	if ( numel(subset) == 0)
		continue;
	end
	nowtrainy = trainy(subset);
	nowtrainX = trainX(subset,:);
	if ( numel(unique(nowtrainy)) == 1 )
		dcpred_model.isoneclass{i} = nowtrainy(1);
		continue;
	else
		if ( (nnz(nowtrainy==1) <= 5) )
			dcpred_model.isoneclass{i} = -1;
			continue;
		end
		if ( (nnz(nowtrainy==-1) <=5 ))  
			dcpred_model.isoneclass{i} = 1;
			continue;
		end
%		dcpred_model.isoneclass{i} = 1000;
	end
%	nowinnerproducts = innerproducts(idx==i,1:num_map(i));
	kernelmodel{i} = svmtrain(nowtrainy, nowtrainX, libsvmcmd);
	
	num = floor(sum(kernelmodel{i}.nSV)*ratio);
	num = max(num, 3);
	[landmarkpoints] = gen_samples(nowtrainy, nowtrainX, kernelmodel{i}, num, c, gamma);
%	landmarkpoints = kernelmodel{i}.SVs;
%	nowtrainX = kernelmodel{i}.SVs;
%	nowtrainy = nowtrainy(kernelmodel{i}.sv_indices);

	%{
	oldC = [rbf(nowtrainX, landmarkpoints, gamma) exp(-nowinnerproducts*gamma)];
	aa = size(oldC,2);
	[newC nowquad] = gen_pairs(oldC, min(40, aa*(aa-1)/2));
	C = [oldC newC];
	%}
	C = [rbf(nowtrainX, landmarkpoints, gamma)];
	dcpred_model.quad{i} = [];
%{
	aaa = randperm(size(nowtrainX,1));
	subn = min(size(C,2)*400, size(nowtrainX,1));
	subsamples = nowtrainX(aaa(1:subn),:);
	submat = rbf(subsamples, subsamples, gamma);
	Csub = C(aaa(1:subn),:);
	
	CsubCsub = Csub'*Csub;
	[u s v] = svd(CsubCsub);
	ss = diag(s);
	nind = find(ss>1e-3);
	invCsub = u(:,nind)*diag(1./ss(nind))*u(:,nind)';
	W = full(invCsub*Csub'*submat*Csub*invCsub);
	[u s v] = svd(W);
	Whalf = u*sqrt(s)*v';
%}

	W = rbf(landmarkpoints, landmarkpoints, gamma);
	[u s v] = svd(W);
	ss = diag(s);
	nind = find(ss>1e-3);
	sss = sqrt(1./ss(nind));
	Whalf = u(:,nind)*diag(sss)*v(:,nind)';

%	Whalf = eye(size(C,2));

	features = C*Whalf;
	model{i} = train(nowtrainy, sparse(features), liblinearcmd);
	dcpred_model.w{i} = Whalf*(model{i}.w')*model{i}.Label(1);
	
%{
	aaa = zeros(size(C,1),1);
	aaa(kernelmodel{i}.sv_indices) = kernelmodel{i}.sv_coef;
	dcpred_model.w{i} = Whalf*Whalf*C'*aaa*kernelmodel{i}.Label(1);
	%}
	dcpred_model.landmarks{i} = full(landmarkpoints');
	
end

fprintf('DcPredTrain: training time %g secs. \n', cputime - timebegin);
