python 数据分析的题目。
做题方法:使用 pandas 读取数据,然后分析。
知识点:pandas,正则表达式,py知识。
过程:不断使用 GPT,遇到有问题的地方自己分析,把分析的结果告诉 GPT,注意要使用 Data Analysis,中文效果不是很好。
import pandas as pd
import hashlib
import re
from datetime import datetimedef load_csv(file_path):return pd.read_csv(file_path)def preprocess_a_table(a_table):account_table_map = {}for _, row in a_table.iterrows():account = row['账号']table = row['操作表']if account not in account_table_map:account_table_map[account] = set()account_table_map[account].add(table)return account_table_map# row 是b表
def is_valid_operation(row, a_table, account_table_map):account = row['账号']operation = row['执行操作'].lower()# Extracting table name and operation typetable_name_match = re.search(r'from (\w+)', operation) or re.search(r'into (\w+)', operation) or re.search(r'update (\w+)', operation)table_name = table_name_match.group(1) if table_name_match else Noneoperation_type_match = re.match(r'(\w+)', operation)operation_type = operation_type_match.group(1) if operation_type_match else None# Check if account is not in account_table_map or if the table_name is not in the account's permissible tablesif account not in account_table_map or (table_name and table_name not in account_table_map[account]):return '0_' + str(row['编号'])# Check for time and permission violationsfor _, account_row in a_table[a_table['账号'] == account].iterrows():if account_row['操作表'] == table_name:# Time checkoperation_time = datetime.strptime(row['操作时间'], '%Y/%m/%d %H:%M:%S').time()start_time, end_time = account_row['可操作时间段(时:分:秒)'].split('~')start_time = datetime.strptime(start_time, '%H:%M:%S').time()end_time = datetime.strptime(end_time, '%H:%M:%S').time()if not (start_time <= operation_time <= end_time):return str(account_row['编号']) + '_' + str(row['编号']) # Time violation# Permission checkpermissions = account_row['权限'].split(',')if operation_type and operation_type not in permissions:return str(account_row['编号']) + '_' + str(row['编号']) # Permission violationreturn Nonedef custom_sort(violation):parts = violation.split('_')return (int(parts[0]), int(parts[1]))def main():a_table_path = '../iris_data/a_table_test.csv'b_table_path = '../iris_data/b_table_test.csv'a_table = load_csv(a_table_path)b_table = load_csv(b_table_path)account_table_map = preprocess_a_table(a_table)violations = []for _, row in b_table.iterrows():violation = is_valid_operation(row, a_table, account_table_map)if violation:violations.append(violation)# Sort the violations based on the numerical part after the underscoreviolations_sorted = sorted(violations, key=custom_sort)# Join the sorted list into a stringviolations_joined = ','.join(violations_sorted)# Calculate MD5 hashmd5_hash = hashlib.md5(violations_joined.encode()).hexdigest()print(f"Sorted Violations: {violations_joined}")print(f"MD5 Hash: {md5_hash}")if __name__ == "__main__":main()