df的一些改列名,筛选操作

#index_col:指定索引列,nrows:针对大文件,选取前多少行,dtype:改数据类型,usecols:选取指定列

dat = pd.read_csv(path,index_col=1,nrows=1000,dtype={'报道时间':datetime,...},usecols=[ \ '列1','列2'])

#更改索引,列的名字,改为datetime数据类型

df = df.set_index(['date'])

df = df.rename({'报道时间':'report_date',...},axis=1)

df['report_date'] = pd.to_datetime(df['report_date'])

df['report_date'] = df['report_date'].astype('datetime')

df['stock_id'] = df['stock_id'].astype('string')

#对df进行筛选

df['stock_id'] = df['stock_id'].iloc(lambda x:x==921)#stock_id是921的股票

df = df.apply(lambda row:row['stock_id']==921,axis=1)

#reset_index()使groupby后仍为dataframe

data1 = sheet_test.groupby(['股票代码','报道时间'])['daily_yield'].mean().reset_index()

data1 = dat.groupby(['股票代码','报道时间'])['内容'].apply(sum).reset_index()#把内容list进行合并

sheet = pd.merge(res,ret,on=['股票代码','报道时间'],how='left')

#知识:进行日期列解析,提取年,月,日

years = df['date'].dt.year

months = df['date'].dt.month

days = df['date'].dt.day

#组内排序后前TOP10

df2 = df.sort_values('A',ascending=False).groupby(['B','D']).head(10)

#组内排序后加上num

df['group_sortnum'] = df['A'].groupby([df['B'],df['D']]).rank(ascending=0,method='dense')

#删除指定列为NAN的行

sheet = sheet.dropna(subset=['daily_yield'],how='any').reset_index()

#给词之间加上‘|’

dat1['内容'] = dat1.apply(lambda row:'|'.join(row['内容']),axis=1)

#if三元运算符

sheet['daily_yield'] = sheet['daily_yield'].fillna(0)

sheet['daily_yield'] = sheet['daily_yield'].apply(lambda x:x if x == 0 else 1)

#某一列变为list

word_list = df['word_list'].to_list()

#pd生成一个时间序列

index = pd.date_range('6/1/2012','6/4/2012')

#df行错位

#periods默认向后移,-1向前移,axis默认是0

df['return'] = df['return'].shift(-1)

df = df.shift(freq=datetime.timedelta(1)) #freq只使用时间序列

#df获取某一段时间的数据

1.把date设置为索引,然后用切边

df = df.set_index(['date'])

df = df['2012-06-01':'2012-06-05']

data = data[(data['Date'] >=pd.to_datetime('20120701')) & (data['Date'] <= pd.to_datetime('20120831'))]

用map构建一个dataframe

#用map构造dataframe

date = pd.date_range('6/1/2012','6/10/2012')

np.random.seed(0)

stock_return = np.random.rand(10)

data = {'date':date,'stock_return':stock_return}

df = pd.DataFrame(data)

df

参考阅读

评论可见,请评论后查看内容,谢谢!!!
 您阅读本篇文章共花了: