【问题1】:银行卡欺诈问题

发布时间:2024-11-20 17:33

对税务问题保持警惕,防止欺诈 #生活技巧# #财务管理技巧# #个人税务#

下面就是相关数据,这是一个脱敏数据,经过了一定的处理,我们不需要分析情况处理相关特征。

 如果需要做实验,评论留邮箱,发数据。下面是代码部分,具体每一步的作用,代码中已经进行了详细的阐述。

import pandas as pd

import numpy as np

from sklearn.linear_model import LogisticRegression

from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split

from sklearn.model_selection import KFold

from sklearn.metrics import confusion_matrix

from sklearn.metrics import precision_score, recall_score, f1_score

credit = pd.read_csv('./data/creditcard.csv')

print(credit.head())

count_classes = pd.value_counts(credit['Class'], sort=True)

credit['normAmount'] = StandardScaler().fit_transform(np.array(credit['Amount']).reshape(-1, 1))

credit = credit.drop(['Time', 'Amount'], axis=1)

columns_index = credit.columns

credit = np.array(credit)

np.random.shuffle(credit)

credit = pd.DataFrame(credit, columns=columns_index)

X = credit.loc[:, credit.columns != 'Class']

y = credit.loc[:, credit.columns == 'Class']

bad_num = len(credit[credit['Class'] == 1])

bad_indices = credit[credit['Class'] == 1].index

good_indices = credit[credit['Class'] == 0].index

random_good_indices = np.random.choice(good_indices, bad_num)

random_good_indices = np.array(random_good_indices)

resample_indices = np.concatenate([bad_indices, random_good_indices])

resample_data = credit.iloc[resample_indices, :]

resample_x = resample_data.loc[:, resample_data.columns != 'Class']

resample_y = resample_data.loc[:, resample_data.columns == 'Class']

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

re_x_train, re_x_test, re_y_train, re_y_test = train_test_split(resample_x, resample_y, test_size=0.25, random_state=42)

def print_kfold(x_train_data, y_train_data):

fold = KFold(n_splits=5, shuffle=False)

c_param_range = [0.01, 0.1, 1, 10, 100]

result = pd.DataFrame(index=[0, 1, 2, 3, 4], columns=['C_parameter', 'Mean recall score'])

result['C_parameter'] = c_param_range

j = 0

for c_param in c_param_range:

print("*"*100)

print('C parameter:', c_param)

recall_accs = []

for train_index, test_index in fold.split(x_train_data, y_train_data):

lr = LogisticRegression(C=c_param, penalty='l1')

lr.fit(x_train_data.iloc[train_index, :], y_train_data.iloc[train_index, :])

y_pred = lr.predict(x_train_data.iloc[test_index, :])

recall = recall_score(y_train_data.iloc[test_index, :], y_pred)

recall_accs.append(recall)

print("此次召回率:", recall)

print('平均召回率:', np.mean(recall_accs))

result.loc[j, 'Mean recall score'] = np.mean(recall_accs)

j += 1

print('平均召回率为:', np.mean(recall_accs))

print(result)

result['Mean recall score'] = result['Mean recall score'].astype('float64')

best_c = result.loc[result['Mean recall score'].idxmax()]['C_parameter']

print("最好的参数C:", best_c)

return best_c

best_c = print_kfold(re_x_train, re_y_train)

lr = LogisticRegression(C=best_c, penalty='l1')

lr.fit(re_x_train, re_y_train)

re_y_pred = lr.predict(re_x_train)

matrix = confusion_matrix(re_y_train, re_y_pred)

print("混淆矩阵:\n", matrix)

print("精度:", precision_score(re_y_train, re_y_pred))

print("召回率:", recall_score(re_y_train, re_y_pred))

print("f1分数:", f1_score(re_y_train, re_y_pred))

最后的实验效果:

网址:【问题1】:银行卡欺诈问题 https://www.yuejiaxmz.com/news/view/158289

相关内容

卖的翻新二手问题产品,欺诈消费者是全新正品!
伪基站冒充银行客服发诈骗短信 招行信用卡提供防骗小贴士
个人在网络生活中,面对常见的网络安全问题该如何防范?(最全问答)
“汇聚金融力量 共创美好生活”,平安银行反诈趣营地助力进阶金融大“营”家
感情问题法律咨询
厦门易点生活电子商务有限公司小店订单代付欺诈
大华银行(中国)防范非法集资与金融诈骗
消费者投诉:万物新生欺诈消费者,无故拖延打款,京东平台无法解决
公民应该要如何防金融诈骗
常见问题

随便看看