df的一些改列名,筛选操作
#index_col:指定索引列,nrows:针对大文件,选取前多少行,dtype:改数据类型,usecols:选取指定列
dat = pd.read_csv(path,index_col=1,nrows=1000,dtype={'报道时间':datetime,...},usecols=[ \ '列1','列2'])
#更改索引,列的名字,改为datetime数据类型
df = df.set_index(['date'])
df = df.rename({'报道时间':'report_date',...},axis=1)
df['report_date'] = pd.to_datetime(df['report_date'])
df['report_date'] = df['report_date'].astype('datetime')
df['stock_id'] = df['stock_id'].astype('string')
#对df进行筛选
df['stock_id'] = df['stock_id'].iloc(lambda x:x==921)#stock_id是921的股票
df = df.apply(lambda row:row['stock_id']==921,axis=1)
#reset_index()使groupby后仍为dataframe
data1 = sheet_test.groupby(['股票代码','报道时间'])['daily_yield'].mean().reset_index()
data1 = dat.groupby(['股票代码','报道时间'])['内容'].apply(sum).reset_index()#把内容list进行合并
sheet = pd.merge(res,ret,on=['股票代码','报道时间'],how='left')
#知识:进行日期列解析,提取年,月,日
years = df['date'].dt.year
months = df['date'].dt.month
days = df['date'].dt.day
#组内排序后前TOP10
df2 = df.sort_values('A',ascending=False).groupby(['B','D']).head(10)
#组内排序后加上num
df['group_sortnum'] = df['A'].groupby([df['B'],df['D']]).rank(ascending=0,method='dense')
#删除指定列为NAN的行
sheet = sheet.dropna(subset=['daily_yield'],how='any').reset_index()
#给词之间加上‘|’
dat1['内容'] = dat1.apply(lambda row:'|'.join(row['内容']),axis=1)
#if三元运算符
sheet['daily_yield'] = sheet['daily_yield'].fillna(0)
sheet['daily_yield'] = sheet['daily_yield'].apply(lambda x:x if x == 0 else 1)
#某一列变为list
word_list = df['word_list'].to_list()
#pd生成一个时间序列
index = pd.date_range('6/1/2012','6/4/2012')
#df行错位
#periods默认向后移,-1向前移,axis默认是0
df['return'] = df['return'].shift(-1)
df = df.shift(freq=datetime.timedelta(1)) #freq只使用时间序列
#df获取某一段时间的数据
1.把date设置为索引,然后用切边
df = df.set_index(['date'])
df = df['2012-06-01':'2012-06-05']
data = data[(data['Date'] >=pd.to_datetime('20120701')) & (data['Date'] <= pd.to_datetime('20120831'))]
用map构建一个dataframe
#用map构造dataframe
date = pd.date_range('6/1/2012','6/10/2012')
np.random.seed(0)
stock_return = np.random.rand(10)
data = {'date':date,'stock_return':stock_return}
df = pd.DataFrame(data)
df
参考阅读
发表评论