基于随机梯度下降分类算法的活动推荐系统

发布时间:2024-12-10 02:19

个性化推荐算法:基于阅读历史和兴趣的推荐系统 #生活乐趣# #阅读乐趣# #电子书推荐#

本文基于随机梯度下降算法,构建了针对用户的活动推荐系统。

!pip install pycountry !pip install kaggle-cli12

#!kg download -u 'Frank_hongyangfan@163.com' -p 'frank19901104' -c event-recommendation-engine-challenge -f events.csv.gz #!gzip -d events.csv.gz12

0. 加载库

from __future__ import division, print_function import pandas as pd import numpy as np import matplotlib.pyplot as plt %matplotlib inline import seaborn as sns import datetime import itertools import hashlib import locale import pickle import pycountry import scipy.io as sio import scipy.sparse as ss import scipy.spatial.distance as ssd from collections import defaultdict from sklearn.preprocessing import normalize

1234567891011121314151617181920

# 授权登录 # 安装 PyDrive 操作库,该操作每个 notebook 只需要执行一次 !pip install -U -q PyDrive from pydrive.auth import GoogleAuth from pydrive.drive import GoogleDrive from google.colab import auth from oauth2client.client import GoogleCredentials def login_google_drive(): # 授权登录,仅第一次的时候会鉴权 auth.authenticate_user() gauth = GoogleAuth() gauth.credentials = GoogleCredentials.get_application_default() drive = GoogleDrive(gauth) return drive123456789101112131415

# 列出Google Drive下所有的文件 def list_file(drive): file_list = drive.ListFile({ 'q': "'root' in parents and trashed=false"}).GetList() for file1 in file_list: print('title: %s, id: %s, mimeType: %s' % (file1['title'], file1['id'], file1["mimeType"]))123456

drive = login_google_drive()1

list_file(drive)1

title: Colab, id: 1SUtiFjgLwfR1nyRu-80mKnRRhIa6mJHC, mimeType: application/vnd.google-apps.folder title: card2014.pdf, id: 0B6B4WgkpMXnrc3lYVkQybFZ4WkpCSnp1SW5DRGdxMnNSblhB, mimeType: application/pdf 12

file_list = drive.ListFile({ 'q': "'1SUtiFjgLwfR1nyRu-80mKnRRhIa6mJHC' in parents and trashed=false"}).GetList() for file1 in file_list: print('title: %s, id: %s, mimeType: %s' % (file1['title'], file1['id'], file1["mimeType"]))1234

title: Kaggle-event recommendation, id: 1Icj4Bcc6OxMTOQ1oqp2A084146ch5jKG, mimeType: application/vnd.google-apps.folder title: Credit Fraud Detection, id: 1WOmeGF8Muq1em9ZbeLFa-Ml221ErrBJJ, mimeType: application/vnd.google-apps.folder title: LSTM, id: 1XhRk76UkPExDooz8oinsFlsS8e8qt8m2, mimeType: application/vnd.google-apps.folder 123

file_list = drive.ListFile({ 'q': "'1Icj4Bcc6OxMTOQ1oqp2A084146ch5jKG' in parents and trashed=false"}).GetList() for file1 in file_list: print('title: %s, id: %s, mimeType: %s' % (file1['title'], file1['id'], file1["mimeType"]))1234

title: kaggle_event_recommendation, id: 13D1FY3MQU548QVqC8ooZAvvy4BgG0SSI, mimeType: application/vnd.google.colaboratory title: test.csv, id: 1Y7YXncoQuvv6z2J4_JP-hbtVVmyaJvIl, mimeType: text/csv title: train.csv, id: 1i8FtD9G3H5B1QcmWG8yZ4w3aq12ARQcT, mimeType: text/csv 123

# 缓存数据到工作环境 def cache_data(): # id 替换成上一步读取到的对应文件 id train_csv = drive.CreateFile({ 'id': "1i8FtD9G3H5B1QcmWG8yZ4w3aq12ARQcT"}) test_csv = drive.CreateFile({ 'id': "1Y7YXncoQuvv6z2J4_JP-hbtVVmyaJvIl"}) #这里的下载操作只是缓存,不会在你的Google Drive 目录下多下载一个文件 train_csv.GetContentFile('train.csv', "text/csv") test_csv.GetContentFile('test.csv', "text/csv") print("缓存成功") cache_data()

1234567891011121314151617

缓存成功 1

1. 定义数据清洗类

class DataCleaner(object): ''' Common utilities for converting strings to equivalent numbers or number buckets ''' def __init__(self): #载入locale self.localeIdMap = defaultdict(int) for i, l in enumerate(locale.locale_alias.keys()): self.localeIdMap[l] = i + 1 # 载入countries self.countryIdMap = defaultdict(int) self.ctryIdx = defaultdict(int) for i, c in enumerate(pycountry.countries): self.countryIdMap[c.name.lower()] = i + 1 if c.name.lower() == 'usa': self.ctryIdx['US'] = i if c.name.lower() == 'canada': self.ctryIdx['CA'] = i for cc in self.ctryIdx.keys(): for s in pycountry.subdivisions.get(country_code=cc): self.countryIdMap[s.name.lower()] = self.ctryIdx[cc] + 1 # 载入genderId self.genderIdMap = defaultdict(int, { 'male':1, 'female':2}) def getLocaleId(self, locstr): return self.localeIdMap[locstr.lower()] def getGenderId(self, genderStr): return self.genderIdMap[genderStr.lower()] def getJoinedYearMonth(self, dateString): dttm = datetime.datetime.strptime(dateString, "%Y-%m-%dT%H:%M:%S.%fZ") return "".join([str(dttm.year), str(dttm.month)]) def getCountryId(self, location): if (isinstance(location, str) and len(location.strip()) > 0 and location.rfind(" ") > -1): return self.countryIdMap[location[location.rfind(

123456789101112131415161718192021222324252627282930313233343536373839404142

网址:基于随机梯度下降分类算法的活动推荐系统 https://www.yuejiaxmz.com/news/view/430110

相关内容

推荐系统:个性化推荐的算法与实践
【机器学习】推荐系统——基于用户行为分析的个性化推荐技术
基于协同过滤算法的绿色食品推荐系统
基于Java的个性化推荐系统:UserCF与ItemCF算法实现
基于深度学习的个性化音乐推荐系统
基于协作过滤算法的电子商务个性化推荐系统的研究
推荐系统的魅力:个性化推荐算法的发展趋势
基于spark的美食餐厅分析推荐系统
[推荐系统]基于个性化推荐系统研究与实现(2)
基于栈的推荐系统与个性化服务

随便看看