项目地址:https://github.com/AaronEC/CBR-House-Price-Recommender

整个项目只有一个文件Recommender.py,我加了一些注释,方便看懂

# TODO:

# Impliment distance scoring?距离评分

# Calculation for bool value weighting? 布尔值所占比重的计算

# RETAIN new case (customers house) and add it to existing database 保留新案例并将新案例添加到数据库

# This import is just for showing monetary values formatted in local currency

# reeditor:早知晓

# time:2023.04.10

import locale

locale.setlocale(locale.LC_ALL, '') # 用于国际货币换算的,不用理会这个

class house:

""" 保存每个房子相关的所有信息 """

def __init__(self, row):

self.name = row[0]

self.price = float(row[1]) # 售价

self.date = int(row[2]) # 在售时长

self.distance = float(row[3]) # 距离

self.area = int(row[4]) # 使用面积

self.rooms = int(row[5]) # 房间数

self.bedrooms = int(row[6]) # 卧室数

self.detached = row[7] # 是否独立式

self.garage = row[8] # 是否可刀

self.energy = str(row[9]).strip('\n') #能源评级

# 根据通货膨胀调整价格 ( 每三个月增加3% )

for i in range(0, self.date, 3):

self.price += self.price * 0.03

# 设置初始值

self.value = 0

def value(house):

""" 基于权重值计算和更新已出售房子与顾客房子之间的关联度。

:type house: house[class]

:rtype: void(返回类型)

"""

adjustment = 0 #

if house.distance > 0.25:

print(f"\nHouse {house.name} too far away, disregarded")

house.value, house.price = 0, 0

else:

print(f"\nHouse {house.name} within distance, calculating...")

value = weights["distance"] # 获取distance的权重值直接赋给value

if house.area and customerHouse.area:

value += weights["area"] * (house.area / customerHouse.area)

if house.rooms and customerHouse.rooms:

value += weights["rooms"] * (house.rooms / customerHouse.rooms)

if house.bedrooms and customerHouse.bedrooms:

value += weights["bedrooms"] * (house.bedrooms / customerHouse.bedrooms)

if house.energy and customerHouse.energy:

value += weights["energy"] * (energyRating[house.energy] / energyRating[customerHouse.energy])

if house.detached == 'Y': # 对于有限的布尔值采用枚举法处理

value += weights["detached"]

if house.garage == 'Y':

value += weights["garage"]

if customerHouse.detached == 'N':

adjustment += weights["detached"]

if customerHouse.garage == 'N':

adjustment += weights["garage"]

house.value = round(value / (potential - adjustment), 2) # 四舍五入处理关联度

print(f"Relative value: {house.value}")

def saveHouse(file, savedHouse):

""" 将带有推荐值的顾客房子存储到数据库中,留着以后使用

:type file: 数据库文件名

:type savedHouse: house[class]

:rtype: void

"""

# Format house object ready for saving

savedHouse.name = len(houseDatabase) + 1 # 房子序号

savedHouse.price = round(savedHouse.price) # 四舍五入取整

savedHouse.energy = savedHouse.energy + "\n"

# Convert object to list

savedHouse = list(savedHouse.__dict__.values())

savedHouse.pop() # 删除 value 属性

# Convert list to string

outputString = ','.join(str(x) for x in savedHouse)

# Save string to .csv file

with open('Database.csv', 'a') as databaseOut:

# Check if exact house is already in database (to prevent double saving)

for line in databaseIn:

line = ','.join(str(x) for x in line)

if outputString.split(',', 1)[1] == line.split(',', 1)[1]: # 对比除房子序号外的其他属性值,若已经存在相同案例,则 break

print("Exact house already in database, not saving...")

break

# Save to database, if it is a unique entry

else:

print("House not already in database, saving...")

databaseOut.write(outputString)

# 定义计算关联度使用的权重 (基于专家知识)

weights = {

"distance": 4,

"area": 2,

"rooms": 2,

"bedrooms": 2,

"detached": 3,

"garage": 1,

"energy": 1

}

potential = sum(weights.values()) # 计算案例相似的参数

# 定义能源评价对应数值

energyRating = {

"A": 6,

"B": 5,

"C": 4,

"D": 3,

"E": 2,

"F": 1

}

# Send database files to 2d arrays ready for processing

houseIn = [line.split(',') for line in open('House.csv')] # 将待售房子信息转换为二维数组,[[表头信息],[数据信息]]

databaseIn = [line.split(',') for line in open('Database.csv')] # 将已售房子信息转换为二维数组,[[表头信息],[数据信息],…]

# Define object of class 'house' for customer house and reset price

customerHouse = house(houseIn[1])

customerHouse.price = 0

# Define comparison houses (array of objects of class 'house')

houseDatabase = []

for row in databaseIn[1:]: # 跳过表头

houseDatabase.append(house(row))

# Weighted comparisons between customer house and database houses

valueTotals = [] # 存储所有已售房子与顾客房子的关联度

for house in houseDatabase:

value(house)

valueTotals.append(house.value)

# Find closest database house value match to customer house value

bestMatchIndex = valueTotals.index(min(valueTotals, key=lambda x:abs(x-1)))

# Calculate estimated customer house price based on value adjusted price of best match house

customerHouse.price = houseDatabase[bestMatchIndex].price / min(valueTotals, key=lambda x:abs(x-1))

# Output results summary to terminal

print(f"""

------------------------------------------------------------------------------------

Closest match: House {houseDatabase[bestMatchIndex].name}

Relative weighted value: {houseDatabase[bestMatchIndex].value}

------------------------------------------------------------------------------------

Estimated customer house value: {locale.currency(customerHouse.price, grouping=True)}p

------------------------------------------------------------------------------------

""")

# Save customer house to database to improve future recommendation accuracy

userSave = ""

while userSave.lower() != "y" or userSave.lower() != "n":

userSave = input("Save valuation to database? (Y/N): \n")

if userSave.lower() == "y":

saveHouse('Database.csv', customerHouse)

break

elif userSave.lower() == "n":

print("Not saving")

break

else:

print("Invalid input, enter Y for yes, or N for no")

运行结果:

总结

麻雀虽小,五脏俱全。因为这个项目是大学布置的作业所以比较简单,但基本的框架都实现了。计算案例相似度部分有对于文本类型数据的处理,但由于文本型数据取值个数较少,所以原作者采用的类似枚举的方法按值分别计算相似度,所以这个地方的实现还要再看一下别的代码。

加油加油一直在路上,冲冲冲!!!

相关链接

评论可见,请评论后查看内容,谢谢!!!
 您阅读本篇文章共花了: