メインコード

こちらのソースコード。

import #

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression

from dataset_linear import make_data
from pusb_linear_kernel import PU
from densratio import densratio

色々import

experiment #

def experiment(datatype, udata):
	# 
    priors = [0.2, 0.4, 0.6, 0.8]
    ite = 100
    pdata = 400
    num_basis = 300

    seed = 2018

    est_error_pu = np.zeros((len(udata), len(priors), ite))
    est_error_pubp = np.zeros((len(udata), len(priors), ite))
    est_error_dr = np.zeros((len(udata), len(priors), ite))

	# 
    for i in range(len(udata)):
        u = udata[i]
        for j in range(len(priors)):
            pi = priors[j]
            for k in range(ite):
                np.random.seed(seed)
                #PN classification
                x, t = make_data(datatype=datatype)
                x = x/np.max(x, axis=0)
                one = np.ones((len(x),1))
                x_pn = np.concatenate([x, one], axis=1)
                classifier = LogisticRegression(C=0.01, penalty='l2')
                classifier.fit(x_pn, t) 
                                
                perm = np.random.permutation(len(x))
                x_train = x[perm[:-3000]]
                t_train = t[perm[:-3000]]
                x_test = x[perm[-3000:]]
                t_test = t[perm[-3000:]]
                
                xp = x_train[t_train==1]
                one = np.ones((len(xp),1))
                xp_temp = np.concatenate([xp, one], axis=1)
                xp_prob = classifier.predict_proba(xp_temp)[:,1]
                #xp_prob /= np.mean(xp_prob)
                xp_prob = xp_prob**20
                xp_prob /= np.max(xp_prob)
                rand = np.random.uniform(size=len(xp))
                temp = xp[xp_prob > rand]
                while (len(temp) < pdata):
                    rand = np.random.uniform(size=len(xp))
                    temp = np.concatenate([temp, xp[xp_prob > rand]], axis=0)
                xp = temp
                perm = np.random.permutation(len(xp))
                xp = xp[perm[:pdata]]
                updata = np.int(u*pi)
                undata = u - updata
                
                xp_temp = x_train[t_train==1]
                xn_temp = x_train[t_train==0]
                perm = np.random.permutation(len(xp_temp))
                xp_temp = xp_temp[perm[:updata]]
                
                perm = np.random.permutation(len(xn_temp))
                xn_temp = xn_temp[perm[:undata]]
                xu = np.concatenate([xp_temp, xn_temp], axis=0)
                
                x = np.concatenate([xp, xu], axis=0)

                tp = np.ones(len(xp))
                tu = np.zeros(len(xu))
                t = np.concatenate([tp, tu], axis=0)
                
                updata = np.int(1000*pi)
                undata = 1000 - updata
                
                xp_test = x_test[t_test == 1]
                perm = np.random.permutation(len(xp_test))
                xp_test = xp_test[perm[:updata]]
                xn_test = x_test[t_test == 0]
                perm = np.random.permutation(len(xn_test))
                xn_test = xn_test[perm[:undata]]
                
                x_test = np.concatenate([xp_test, xn_test], axis=0)
                tp = np.ones(len(xp_test))
                tu = np.zeros(len(xn_test))
                t_test = np.concatenate([tp, tu], axis=0)
                
                pu = PU(pi=pi)
                x_train = x
                res, x_test_kernel = pu.optimize(x, t, x_test)
                acc1 = pu.test(x_test_kernel, res, t_test, quant=False)
                acc2 = pu.test(x_test_kernel, res, t_test, quant=True, pi=pi)
                
                result = densratio(x_train[t==1], x_train[t==0])
                r = result.compute_density_ratio(x_test)
                temp = np.copy(r)
                temp = np.sort(temp)
                theta = temp[np.int(np.floor(len(x_test)*(1-pi)))]
                pred = np.zeros(len(x_test))
                pred[r > theta] = 1
                acc3 = np.mean(pred == t_test)
                
                est_error_pu[i, j, k] = acc1
                est_error_pubp[i, j, k] = acc2
                est_error_dr[i, j, k] = acc3

                seed += 1
                
                print(acc1)
                print(acc2)
                print(acc3)
    
    est_error_pu_mean = np.mean(est_error_pu, axis=2)
    est_error_pubp_mean = np.mean(est_error_pubp, axis=2)
    est_error_dr_mean = np.mean(est_error_dr, axis=2)
    est_error_pu_std = np.std(est_error_pu, axis=2)
    est_error_pubp_std = np.std(est_error_pubp, axis=2)
    est_error_dr_std = np.std(est_error_dr, axis=2)
    return est_error_pu_mean, est_error_pubp_mean, est_error_pu_std, est_error_pubp_std, est_error_dr_mean, est_error_dr_std