import numpy as np
import pandas as pd
#数据预处理与数据提取
#创建数据表1
df1 = pd.DataFrame({"id":[1001,1002,1003,1004,1005,1006],
"date":pd.date_range('20220102', periods=6),
"city":['Beijing ', 'Hangzhou', 'Shanghai', 'Beijing', 'Shanghai', 'Shanghai'],
"age":[31,27,25,27,29,28],
"category":['100-B','100-B','110-A','110-C','210-C','130-F'],
"price":[1000,np.nan,2300,5400,np.nan,3330]},
columns =['id','date','city','category','age','price'])
#创建数据字典
col={'id':[1001,1002,1003,1004,1005,1006,1007,1008,1009],
'gender':['F','M','F','F','M','M','M','F','F'],
'name':['Jane','Wan','Summer','Flore','Wang','Chung','Dev','Linda','Lucy',],
'salary':[3000,5500,3500,4600,4500,6000,3800,6500,3500]}
#创建数据表2
df2 = pd.DataFrame(col,
columns =['id','gender','name','salary'])