当有很多模型可以选择,而又不太确定哪个模型比较好的时候,就直接一个一个试过去好了。写了一个我认为的回归模型一般需要的过程。其中有归一化、反归一化、均方误差、拟合曲线、预测曲线等,还有什么需要稍微改改就好了吧。 代码如下:

# coding: utf-8

import numpy as np

from sklearn import svm

import matplotlib.pyplot as plt

import matplotlib as mpl

from sklearn.preprocessing import MinMaxScaler

from sklearn.metrics import mean_squared_error

mpl.rcParams['font.sans-serif'] = ['FangSong'] # 指定中文字体

mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题

plt.rcParams['font.sans-serif'] = ['SimHei']

plt.rcParams['axes.unicode_minus'] = False # 正常显示负号

def train_and_chose_model(model):

# 构造数据

N = 400

np.random.seed(0)

data_x = np.sort(np.random.uniform(0, 22, N), axis=0)

data_y = 2 * np.sin(data_x) + 0.01 * np.random.randn(N)

data_x = data_x.reshape(-1, 1)

data_y = data_y.reshape(-1, 1)

scaler_1 = MinMaxScaler(feature_range=(0, 1))

scaler_2 = MinMaxScaler(feature_range=(0, 1))

data_x = scaler_1.fit_transform(data_x) # 归一化处理,构造两个因为之后还需要反归一化

data_y = scaler_2.fit_transform(data_y)

size = int(N * 0.8) # 确定训练集的数量,这里我希望构造时间序列模型,因此是按顺序喂进去,但是我的结果不怎么好看,过拟合了

x_train = data_x[0:size, ]

y_train = data_y[0:size, ]

x_test = data_x[size:, ]

y_test = data_x[size:, ]

model.fit(x_train, y_train) # 训练模型

score_test = model.score(x_test, y_test)

score_train = model.score(x_train, y_train)

print(str(model) + "训练集准确率为:" + str(score_train))

print(str(model) + "测试集准确率为:" + str(score_test)) # 出现负数不是很理解

y_test_pre = model.predict(x_test) # 预测测试集

y_train_pre = model.predict(x_train) # 预测训练集

print("测试集均方误差:", mean_squared_error(y_train, y_train_pre))

print("训练集均方误差:", mean_squared_error(y_test, y_test_pre))

data_x = scaler_1.inverse_transform(data_x) # 反归一化

data_y = scaler_2.inverse_transform(data_y)

x_train = scaler_1.inverse_transform(x_train)

x_test = scaler_1.inverse_transform(x_test)

y_test_pre = scaler_2.inverse_transform(y_test_pre.reshape(-1, 1))

y_train_pre = scaler_2.inverse_transform(y_train_pre.reshape(-1, 1)) # 测试集的预测值

plt.figure(figsize=(9, 8), facecolor='w')

plt.plot(x_train, y_train_pre, 'r-', linewidth=2, label=str(model) + "训练集")

plt.plot(x_test, y_test_pre, 'b-', linewidth=2, label=str(model) + "测试集")

plt.plot(data_x, data_y, 'mo', markersize=6)

plt.legend(loc='lower left')

plt.title(str(model), fontsize=16)

plt.xlabel('X')

plt.ylabel('Y')

# plt.xlim((1980, 2021)) # 设置横纵坐标范围

# plt.ylim((0, 1000))

plt.grid(True)

plt.show()

if __name__ == '__main__':

all_model = [svm.SVR(C=1.2)] # 添加模型即可

for m in all_model:

train_and_chose_model(m)

输出结果: SVR(C=1.2)训练集准确率为:0.8867929886065467 SVR(C=1.2)测试集准确率为:-672.747134789489 测试集均方误差: 0.012684302261909159 训练集均方误差: 2.44567194386932 讲道理确实很难看,但是流程应该对吧。

相关阅读

评论可见,请评论后查看内容,谢谢!!!
 您阅读本篇文章共花了: