### 1.1

: 设原样本分别为 $x_1,x_2 \in \omega_1, x_3,x_4 \in \omega_2$ ，对其进行规范化得

: 分类面如图所示：

### 2.1

Write a program to implement the “batch perception” algorithm (see page 44 or 45 in PPT). (a). Starting with a = 0, apply your program to the training data from w1 and w2. Note that the number of iterations required for convergence（即记录下收敛的步数）。 (b). Apply your program to the training data from w3 and w2. Again, note that the number of iterations required for convergence. (c). Explain the difference between the iterations required in the above two cases.

: （代码见附录 batch_perception 函数）

(a) 收敛需要 24 步迭代，所得权重 [-30.4,34.09999999999997,34.0]
(b) 收敛需要 17 步迭代，所得权重 [41.40000000000002,-48.60000000000002,-19.0]
(c) 由下图可以发现，w1 (蓝色) 和 w2 (橙色) 的点相离较近，而 w2 (橙色) 与 w3 (绿色) 距离较远，仅有个别点较近。

### 2.2

: （代码见附录 single_sample_relax 函数）

### 2.3

: （代码见附录 pseudo_inverse 函数）

• w1 和 w2 的分类权重：[-0.33391921600076024,0.28785693487557085,0.7826412336099762]
• w2 和 w3 的分类权重：[0.17987721564209358,-0.13701343448280026,-0.02977081723472505]
• w3 和 w4 的分类权重：[0.08276694629366121,0.12681539660924512,0.4783531495651045]

### 附录 代码

w11 = [0.1, 6.8, -3.5, 2.0, 4.1, 3.1, -0.8, 0.9, 5.0, 3.9];
w12 = [1.1, 7.1, -4.1, 2.7, 2.8, 5.0, -1.3, 1.2, 6.4, 4.0];
w21 = [7.1, -1.4, 4.5, 6.3, 4.2, 1.4, 2.4, 2.5, 8.4, 4.1];
w22 = [4.2, -4.3, 0.0, 1.6, 1.9, -3.2, -4.0, -6.1, 3.7, -2.2];
w31 = [-3.0, 0.5, 2.9, -0.1, -4.0, -1.3, -3.4, -4.1, -5.1, 1.9];
w32 = [-2.9, 8.7, 2.1, 5.2, 2.2, 3.7, 6.2, 3.4, 1.6, 5.1];
w41 = [2.0, 8.9, 4.2, 8.5, 6.7, 0.5, 5.3, 8.7, 7.1, 8.0] * -1;
w42 = [8.4, -0.2, 7.7, 3.2, 4.0, 9.2, 6.7, 6.4, 9.7, 6.3] * -1;
const SAMPLE = [w11 w12 w21 w22 w31 w32 w41 w42];

function normalize_data(positive, negative)
normalize = data -> [data ones(size(data)[1])]

positive = normalize(positive)
negative = normalize(negative) * -1
assert(size(positive) == size(negative))
data = transpose([positive; negative])

d, n = size(data)
weight = zeros(d)

data, weight
end

function pseudo_inverse(positive, negative)
data, weight = normalize_data(positive, negative)
d, n = size(data)

weight = pinv(data * transpose(data)) * data * ones(n)
end

function single_sample_relax(positive, negative; eta=1, margin=1, epsilon=0.00001)
data, weight = normalize_data(positive, negative)
d, n = size(data)

function find_error(data, weight, eta, margin)
for i = 1:n
if dot(weight, data[:, i]) <= margin - epsilon
return data[:, i]
end
end
nothing
end

step, y = 1, find_error(data, weight, eta, margin)
while y != nothing
gradient = (margin - dot(weight, y)) / dot(y, y) * y

step += 1
y = find_error(data, weight, eta, margin)
end

weight, step
end

function batch_perception(positive, negative)
data, weight = normalize_data(positive, negative)
d, n = size(data)

function compute_error(data, weight)
error_samples = [y for y in filter([data[:, i] for i = 1:n]) do y
dot(weight, y) <= 0
end]
end

step, error_samples = 1, compute_error(data, weight)

while length(error_samples) > 0
#println("new weight \$weight\t<= \$gradient")

step += 1
error_samples = compute_error(data, weight)
end

weight, step
end

"""
Write a program to implement the “batch perception” algorithm
(see page 44 or 45 in PPT).

(a). Starting with a = 0, apply your program to the training data from 1 and 2.
Note that the number of iterations required for convergence.

(b). Apply your program to the training data from 3 and 2.
Again, note that the number of iterations required for convergence.

(c). Explain the difference between the iterations required in the above two cases.

"""
function hw2_1()
weight, step = batch_perception(SAMPLE[:, 1:2], SAMPLE[:, 3:4])
println("batch perception w1 and w2: iteration steps: \$step, final weights: \$weight")
weight, step = batch_perception(SAMPLE[:, 3:4], SAMPLE[:, 5:6])
println("batch perception w2 and w3: iteration steps: \$step, final weights: \$weight")

weight, step = single_sample_relax(SAMPLE[:, 1:2], SAMPLE[:, 3:4])
println("single sample relax w1 and w2: iteration steps: \$step, final weights: \$weight")

weight = pseudo_inverse(SAMPLE[:, 1:2], SAMPLE[:, 3:4])
println("pseudo_inverse MSE w1 and w2: final weights: \$weight") weight = pseudo_inverse(SAMPLE[:, 3:4], SAMPLE[:, 5:6]) println("pseudo_inverse MSE w2 and w3: final weights: \$weight")
weight = pseudo_inverse(SAMPLE[:, 5:6], SAMPLE[:, 7:8])
println("pseudo_inverse MSE w3 and w4: final weights: \\$weight")

end

if !isinteractive()
hw2_1()
end