主要讲和TransE代码的区别,TransE文章的链接

Java版本TransE代码的学习

关于范数的概念

什么是0范数、1范数、2范数?区别又是什么

 初始化向量

初始化关系平面的向量Wr,初始化向量relation_vec,初始化节点向量entity_vec

Wr_vec = new double[relation_num][vector_dimension];

for (int i = 0; i < relation_num; i++) {

for (int j = 0; j < vector_dimension; j++) {

Wr_vec[i][j] = uniform(-1, 1);

}

norm2one(Wr_vec[i]);

}

relation_vec = new double[relation_num][vector_dimension];

for (int i = 0; i < relation_num; i++) {

for (int j = 0; j < vector_dimension; j++) {

relation_vec[i][j] = uniform(-1, 1);

}

}

entity_vec = new double[entity_num][vector_dimension];

for (int i = 0; i < entity_num; i++) {

for (int j = 0; j < vector_dimension; j++) {

entity_vec[i][j] = uniform(-1, 1);

}

}

负采样方法

对于 1-N 的关系,赋予更高的概率替换头实体,而对于 N-1 的关系,赋予更高的概率替换尾实体。具体地,对每个关系计算其 tph (每个头实体平均对应几个尾实体)和 hpt (每个尾实体平均对应几个头实体)。对于 tph/tph+hpt 越大的,说明是一对多的关系,在负采样时替换头实体,更容易获得 true negative。

tph:每个头实体平均对应几个尾实体

hpt:每个尾实体平均对应几个头实体

left_num.put(i, sum / count); // tph

right_num.put(i, sum / count);

double pr = 1000 * right_num.get(relation_id) / (right_num.get(relation_id) + left_num.get(relation_id));

if (method == 0) {

pr = 500;

}

if (rand() % 1000 < pr) {//替换头实体}

else{//替换尾实体}

计算向量的分数

转换

通过转换,得到

第一个for循环,计算

第二个for循环,计算L2范数的平方,开方以后平方,因为这个向量只有一个值,也就相当于是取绝对值

static double calc_sum(int head, int tail, int relation) {

double Wrh = 0;

double Wrt = 0;

for (int i = 0; i < vector_dimension; i++) {

Wrh += Wr_vec[relation][i] * entity_vec[head][i];

Wrt += Wr_vec[relation][i] * entity_vec[tail][i];

}

double sum = 0, tmp;

for (int i = 0; i < vector_dimension; i++) {

tmp = (entity_vec[tail][i] - Wrt * Wr_vec[relation][i])

- relation_vec[relation][i]

- (entity_vec[head][i] - Wrh * Wr_vec[relation][i]);

sum += abs(tmp);

}

return sum;

}

梯度下降

if (sum1 + margin > sum2) {

res = margin + sum1 - sum2;

gradient(head_a, tail_a, relation_a, -1);

gradient(head_b, tail_b, relation_b, 1);

}

private static void gradient(int head, int tail, int relation, double beta) {

double Wrh = 0;

double Wrt = 0;

for (int i = 0; i < vector_dimension; i++) {

Wrh += Wr_vec[relation][i] * entity_vec[head][i];

Wrt += Wr_vec[relation][i] * entity_vec[tail][i];

}

double sum = 0;

for (int i = 0; i < vector_dimension; i++) {

double delta = (entity_vec[tail][i] - Wrt * Wr_vec[relation][i])

- relation_vec[relation][i]

- (entity_vec[head][i] - Wrh * Wr_vec[relation][i]);

double x = (delta > 0) ? 1 : -1;

sum += x * Wr_vec[relation][i];

relation_vec[relation][i] -= beta * learning_rate * x;

entity_vec[head][i] -= beta * learning_rate * x;

entity_vec[tail][i] += beta * learning_rate * x;

Wr_vec[relation][i] += beta * x * learning_rate * (Wrh - Wrt);

}

for (int i = 0; i < vector_dimension; i++) {

Wr_vec[relation][i] += beta * learning_rate * sum * (entity_vec[head][i] - entity_vec[tail][i]);

}

norm(relation_vec[relation]);

norm(entity_vec[head]);

norm(entity_vec[tail]);

norm2one(Wr_vec[relation]);

norm(relation_vec[relation], Wr_vec[relation]);

}

使用L2范数归一化,先求平方和,再开方

norm(relation_vec[relation]);

static double vec_len(double[] a) {

// calculate the length of the vector

double res = 0;

for (int i = 0; i < vector_dimension; i++) {

res += sqr(a[i]);

}

return sqrt(res);

}

static void norm(double[] a) {

// limit the element a under 1

double x = vec_len(a);

if (x > 1) {

for (int i = 0; i < vector_dimension; i++) {

a[i] /= x;

}

}

}

dr与Wr的关系限制,dr就是relation,Wr是relation的映射平面

norm(relation_vec[relation], Wr_vec[relation]);

static void norm(double[] a, double[] Wr) {

double sum = 0;

while (true) {

for (int i = 0; i < vector_dimension; i++) {

sum += sqr(Wr[i]);

}

sum = sqrt(sum);

for (int i = 0; i < vector_dimension; i++) {

Wr[i] /= sum;

}

double x = 0;

for (int i = 0; i < vector_dimension; i++) {

x += Wr[i] * a[i];

}

if (x > 0.1) {

for (int i = 0; i < vector_dimension; i++) {

double tmp = a[i];

a[i] -= learning_rate * Wr[i];

Wr[i] -= learning_rate * tmp;

}

} else {

break;

}

}

norm2one(Wr);

}

好文阅读

评论可见,请评论后查看内容,谢谢!!!
 您阅读本篇文章共花了: