【问题1】:银行卡欺诈问题
对税务问题保持警惕,防止欺诈 #生活技巧# #财务管理技巧# #个人税务#
下面就是相关数据,这是一个脱敏数据,经过了一定的处理,我们不需要分析情况处理相关特征。
如果需要做实验,评论留邮箱,发数据。下面是代码部分,具体每一步的作用,代码中已经进行了详细的阐述。
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score
credit = pd.read_csv('./data/creditcard.csv')
print(credit.head())
count_classes = pd.value_counts(credit['Class'], sort=True)
credit['normAmount'] = StandardScaler().fit_transform(np.array(credit['Amount']).reshape(-1, 1))
credit = credit.drop(['Time', 'Amount'], axis=1)
columns_index = credit.columns
credit = np.array(credit)
np.random.shuffle(credit)
credit = pd.DataFrame(credit, columns=columns_index)
X = credit.loc[:, credit.columns != 'Class']
y = credit.loc[:, credit.columns == 'Class']
bad_num = len(credit[credit['Class'] == 1])
bad_indices = credit[credit['Class'] == 1].index
good_indices = credit[credit['Class'] == 0].index
random_good_indices = np.random.choice(good_indices, bad_num)
random_good_indices = np.array(random_good_indices)
resample_indices = np.concatenate([bad_indices, random_good_indices])
resample_data = credit.iloc[resample_indices, :]
resample_x = resample_data.loc[:, resample_data.columns != 'Class']
resample_y = resample_data.loc[:, resample_data.columns == 'Class']
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
re_x_train, re_x_test, re_y_train, re_y_test = train_test_split(resample_x, resample_y, test_size=0.25, random_state=42)
def print_kfold(x_train_data, y_train_data):
fold = KFold(n_splits=5, shuffle=False)
c_param_range = [0.01, 0.1, 1, 10, 100]
result = pd.DataFrame(index=[0, 1, 2, 3, 4], columns=['C_parameter', 'Mean recall score'])
result['C_parameter'] = c_param_range
j = 0
for c_param in c_param_range:
print("*"*100)
print('C parameter:', c_param)
recall_accs = []
for train_index, test_index in fold.split(x_train_data, y_train_data):
lr = LogisticRegression(C=c_param, penalty='l1')
lr.fit(x_train_data.iloc[train_index, :], y_train_data.iloc[train_index, :])
y_pred = lr.predict(x_train_data.iloc[test_index, :])
recall = recall_score(y_train_data.iloc[test_index, :], y_pred)
recall_accs.append(recall)
print("此次召回率:", recall)
print('平均召回率:', np.mean(recall_accs))
result.loc[j, 'Mean recall score'] = np.mean(recall_accs)
j += 1
print('平均召回率为:', np.mean(recall_accs))
print(result)
result['Mean recall score'] = result['Mean recall score'].astype('float64')
best_c = result.loc[result['Mean recall score'].idxmax()]['C_parameter']
print("最好的参数C:", best_c)
return best_c
best_c = print_kfold(re_x_train, re_y_train)
lr = LogisticRegression(C=best_c, penalty='l1')
lr.fit(re_x_train, re_y_train)
re_y_pred = lr.predict(re_x_train)
matrix = confusion_matrix(re_y_train, re_y_pred)
print("混淆矩阵:\n", matrix)
print("精度:", precision_score(re_y_train, re_y_pred))
print("召回率:", recall_score(re_y_train, re_y_pred))
print("f1分数:", f1_score(re_y_train, re_y_pred))
最后的实验效果:
网址:【问题1】:银行卡欺诈问题 https://www.yuejiaxmz.com/news/view/158289
相关内容
卖的翻新二手问题产品,欺诈消费者是全新正品!伪基站冒充银行客服发诈骗短信 招行信用卡提供防骗小贴士
个人在网络生活中,面对常见的网络安全问题该如何防范?(最全问答)
“汇聚金融力量 共创美好生活”,平安银行反诈趣营地助力进阶金融大“营”家
感情问题法律咨询
厦门易点生活电子商务有限公司小店订单代付欺诈
大华银行(中国)防范非法集资与金融诈骗
消费者投诉:万物新生欺诈消费者,无故拖延打款,京东平台无法解决
公民应该要如何防金融诈骗
常见问题