个人因为税务系统测试需要,需要生成一些人员用来测试。
测试人员数据要求:
-
姓名必须以“测试”开头,后续文字不限
-
手机号必须以13333开头
-
身份证年龄必须满足18周岁
-
身份证号码必须以828888开头,且身份证必须符合校验逻辑
以下是相关代码,并生成excel文件及json数据
import random
import string
from datetime import datetime, timedelta
from faker import Faker
import json
import pandas as pd
# 创建 Faker 对象(中文)
fake = Faker('zh_CN')
# 身份证权重因子 & 校验码映射
weights = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2, 1]
check_digit_map = {0: '1', 1: '0', 2: 'X', 3: '9', 4: '8', 5: '7', 6: '6', 7: '5', 8: '4', 9: '3', 10: '2'}
# 存储已生成的身份证号 & 手机号,set用以去重
generated_id_cards = set()
generated_phone_numbers = set()
# 生成手机号(以 "13333" 开头)
def generate_phone_number():
while True:
phone = "13333" + ''.join(random.choices(string.digits, k=6))
if phone not in generated_phone_numbers:
generated_phone_numbers.add(phone)
return phone
# 生成性别
def generate_gender(id_card):
return "男" if int(id_card[-2]) % 2 != 0 else "女"
# 生成合理的出生日期(18~60岁)
def generate_birthdate():
cutoff_date = datetime(2025, 1, 1) - timedelta(days=18 * 365)
start_date = datetime(1965, 1, 1) # 60岁
random_date = start_date + timedelta(days=random.randint(0, (cutoff_date - start_date).days))
return random_date
# 计算身份证号码的校验码
def calculate_check_digit(id_card_prefix):
digits = [int(digit) for digit in id_card_prefix]
weighted_sum = sum(d * w for d, w in zip(digits, weights))
remainder = weighted_sum % 11
return check_digit_map[remainder]
# 生成身份证号(确保唯一)
def generate_id_card(birthdate):
while True:
area_code = '828888' # 固定前缀
birthdate_str = birthdate.strftime("%Y%m%d")
last_digits = ''.join(random.choices('0123456789', k=3)) # 随机后三位
id_card_prefix = area_code + birthdate_str + last_digits
check_digit = calculate_check_digit(id_card_prefix)
id_card = id_card_prefix + check_digit
if id_card not in generated_id_cards:
generated_id_cards.add(id_card)
return id_card
# 生成随机姓名(确保 2~4 个字)
def generate_name():
while True:
name = fake.name()
if 2 <= len(name) <= 4:
return name
# 生成单个人员信息
def generate_random_person():
birthdate = generate_birthdate()
phone_number = generate_phone_number()
id_card = generate_id_card(birthdate)
gender = generate_gender(id_card)
name = generate_name()
return {
"姓名": name,
"手机号": phone_number,
"性别": gender,
"身份证号码": id_card,
"出生日期": birthdate.strftime("%Y-%m-%d")
}
# 批量生成 2000 个随机人员信息
random_people = [generate_random_person() for _ in range(2000)]
# 保存为 JSON
with open("random_people.json", "w", encoding="utf-8") as f:
json.dump(random_people, f, ensure_ascii=False, indent=4)
# 保存为 Excel
df = pd.DataFrame(random_people)
df.to_excel("random_people.xlsx", index=False)
print("随机人员信息已保存到 'random_people.json' 和 'random_people.xlsx' 文件中。")
评论