{
"cells": [
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import json\n",
"import matplotlib.pyplot as plt\n",
"def load_data():\n",
" # 从文件导入数据\n",
" datafile = './housing.data'\n",
" data = np.fromfile(datafile, sep=' ')\n",
"\n",
" # 每条数据包括14项,其中前面13项是影响因素,第14项是相应的房屋价格中位数\n",
" feature_names = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', \\\n",
" 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ]\n",
" feature_num = len(feature_names)\n",
"\n",
" # 将原始数据进行Reshape,变成[N, 14]这样的形状\n",
" data = data.reshape([data.shape[0] // feature_num, feature_num])\n",
"\n",
" # 将原数据集拆分成训练集和测试集\n",
" # 这里使用80%的数据做训练,20%的数据做测试\n",
" # 测试集和训练集必须是没有交集的\n",
" ratio = 0.8\n",
" offset = int(data.shape[0] * ratio)\n",
" training_data = data[:offset]\n",
"\n",
" # 计算训练集的zui大值,zui小值,平均值\n",
" maximums, minimums, avgs = training_data.max(axis=0), training_data.min(axis=0), \\\n",
" training_data.sum(axis=0) / training_data.shape[0]\n",
"\n",
" # 对数据进行归一化处理\n",
.....
机器学习房价预测数据集含使用百度飞桨重写的房价预测模型程序源代码说明.zip
(29.19 MB, 需要: RMB 29 元)
本附件包括:- eb62f90c25be0aec37f055f7b9633d9.png


雷达卡




京公网安备 11010802022788号







