基于随机梯度下降分类算法的活动推荐系统

发布时间：2024-12-10 02:19

个性化推荐算法：基于阅读历史和兴趣的推荐系统 #生活乐趣# #阅读乐趣# #电子书推荐#

本文基于随机梯度下降算法，构建了针对用户的活动推荐系统。

!pip install pycountry !pip install kaggle-cli12

#!kg download -u 'Frank_hongyangfan@163.com' -p 'frank19901104' -c event-recommendation-engine-challenge -f events.csv.gz #!gzip -d events.csv.gz12

0. 加载库

from __future__ import division, print_function import pandas as pd import numpy as np import matplotlib.pyplot as plt %matplotlib inline import seaborn as sns import datetime import itertools import hashlib import locale import pickle import pycountry import scipy.io as sio import scipy.sparse as ss import scipy.spatial.distance as ssd from collections import defaultdict from sklearn.preprocessing import normalize

1234567891011121314151617181920

# 授权登录 # 安装 PyDrive 操作库，该操作每个 notebook 只需要执行一次 !pip install -U -q PyDrive from pydrive.auth import GoogleAuth from pydrive.drive import GoogleDrive from google.colab import auth from oauth2client.client import GoogleCredentials def login_google_drive(): # 授权登录，仅第一次的时候会鉴权 auth.authenticate_user() gauth = GoogleAuth() gauth.credentials = GoogleCredentials.get_application_default() drive = GoogleDrive(gauth) return drive123456789101112131415

# 列出Google Drive下所有的文件 def list_file(drive): file_list = drive.ListFile({ 'q': "'root' in parents and trashed=false"}).GetList() for file1 in file_list: print('title: %s, id: %s, mimeType: %s' % (file1['title'], file1['id'], file1["mimeType"]))123456

drive = login_google_drive()1

list_file(drive)1

title: Colab, id: 1SUtiFjgLwfR1nyRu-80mKnRRhIa6mJHC, mimeType: application/vnd.google-apps.folder title: card2014.pdf, id: 0B6B4WgkpMXnrc3lYVkQybFZ4WkpCSnp1SW5DRGdxMnNSblhB, mimeType: application/pdf 12

file_list = drive.ListFile({ 'q': "'1SUtiFjgLwfR1nyRu-80mKnRRhIa6mJHC' in parents and trashed=false"}).GetList() for file1 in file_list: print('title: %s, id: %s, mimeType: %s' % (file1['title'], file1['id'], file1["mimeType"]))1234

title: Kaggle-event recommendation, id: 1Icj4Bcc6OxMTOQ1oqp2A084146ch5jKG, mimeType: application/vnd.google-apps.folder title: Credit Fraud Detection, id: 1WOmeGF8Muq1em9ZbeLFa-Ml221ErrBJJ, mimeType: application/vnd.google-apps.folder title: LSTM, id: 1XhRk76UkPExDooz8oinsFlsS8e8qt8m2, mimeType: application/vnd.google-apps.folder 123

file_list = drive.ListFile({ 'q': "'1Icj4Bcc6OxMTOQ1oqp2A084146ch5jKG' in parents and trashed=false"}).GetList() for file1 in file_list: print('title: %s, id: %s, mimeType: %s' % (file1['title'], file1['id'], file1["mimeType"]))1234

title: kaggle_event_recommendation, id: 13D1FY3MQU548QVqC8ooZAvvy4BgG0SSI, mimeType: application/vnd.google.colaboratory title: test.csv, id: 1Y7YXncoQuvv6z2J4_JP-hbtVVmyaJvIl, mimeType: text/csv title: train.csv, id: 1i8FtD9G3H5B1QcmWG8yZ4w3aq12ARQcT, mimeType: text/csv 123

# 缓存数据到工作环境 def cache_data(): # id 替换成上一步读取到的对应文件 id train_csv = drive.CreateFile({ 'id': "1i8FtD9G3H5B1QcmWG8yZ4w3aq12ARQcT"}) test_csv = drive.CreateFile({ 'id': "1Y7YXncoQuvv6z2J4_JP-hbtVVmyaJvIl"}) #这里的下载操作只是缓存，不会在你的Google Drive 目录下多下载一个文件 train_csv.GetContentFile('train.csv', "text/csv") test_csv.GetContentFile('test.csv', "text/csv") print("缓存成功") cache_data()

1234567891011121314151617

缓存成功 1

1. 定义数据清洗类

class DataCleaner(object): ''' Common utilities for converting strings to equivalent numbers or number buckets ''' def __init__(self): #载入locale self.localeIdMap = defaultdict(int) for i, l in enumerate(locale.locale_alias.keys()): self.localeIdMap[l] = i + 1 # 载入countries self.countryIdMap = defaultdict(int) self.ctryIdx = defaultdict(int) for i, c in enumerate(pycountry.countries): self.countryIdMap[c.name.lower()] = i + 1 if c.name.lower() == 'usa': self.ctryIdx['US'] = i if c.name.lower() == 'canada': self.ctryIdx['CA'] = i for cc in self.ctryIdx.keys(): for s in pycountry.subdivisions.get(country_code=cc): self.countryIdMap[s.name.lower()] = self.ctryIdx[cc] + 1 # 载入genderId self.genderIdMap = defaultdict(int, { 'male':1, 'female':2}) def getLocaleId(self, locstr): return self.localeIdMap[locstr.lower()] def getGenderId(self, genderStr): return self.genderIdMap[genderStr.lower()] def getJoinedYearMonth(self, dateString): dttm = datetime.datetime.strptime(dateString, "%Y-%m-%dT%H:%M:%S.%fZ") return "".join([str(dttm.year), str(dttm.month)]) def getCountryId(self, location): if (isinstance(location, str) and len(location.strip()) > 0 and location.rfind(" ") > -1): return self.countryIdMap[location[location.rfind(

123456789101112131415161718192021222324252627282930313233343536373839404142

网址：基于随机梯度下降分类算法的活动推荐系统 https://www.yuejiaxmz.com/news/view/430110

上一篇：【校园保洁的意义

下一篇：VMware 安装出现“不能对软

基于随机梯度下降分类算法的活动推荐系统

0. 加载库

1. 定义数据清洗类

相关内容

随便看看

最新动态分享

热点动态分享

专题

推荐动态分享