Advanced Python Pandas (Merging, Apply, Groupby, Pivot, Date)
Original Source: https://www.coursera.org/specializations/data-science-python
Merging Dataframes
import pandas as pd
import numpy as np
staff_df = pd.DataFrame([{'Name': 'Kelly', 'Role': 'Director of HR'},
{'Name': 'Sally', 'Role': 'Course liasion'},
{'Name': 'James', 'Role': 'Grader'}])
staff_df = staff_df.set_index('Name')
staff_df.head()
Role | |
---|---|
Name | |
Kelly | Director of HR |
Sally | Course liasion |
James | Grader |
student_df = pd.DataFrame([{'Name': 'James', 'School': 'Business'},
{'Name': 'Mike', 'School': 'Law'},
{'Name': 'Sally', 'School': 'Engineering'}])
student_df = student_df.set_index('Name')
student_df.head()
School | |
---|---|
Name | |
James | Business |
Mike | Law |
Sally | Engineering |
pd.merge(staff_df, student_df, how='outer', left_index=True, right_index=True)
Role | School | |
---|---|---|
Name | ||
James | Grader | Business |
Kelly | Director of HR | NaN |
Mike | NaN | Law |
Sally | Course liasion | Engineering |
pd.merge(staff_df, student_df, how='inner', left_index=True, right_index=True)
Role | School | |
---|---|---|
Name | ||
Sally | Course liasion | Engineering |
James | Grader | Business |
pd.merge(staff_df, student_df, how='left', left_index=True, right_index=True)
Role | School | |
---|---|---|
Name | ||
Kelly | Director of HR | NaN |
Sally | Course liasion | Engineering |
James | Grader | Business |
pd.merge(staff_df, student_df, how='right', left_index=True, right_index=True)
Role | School | |
---|---|---|
Name | ||
James | Grader | Business |
Mike | NaN | Law |
Sally | Course liasion | Engineering |
staff_df = staff_df.reset_index()
student_df = student_df.reset_index()
pd.merge(staff_df, student_df, how='left', left_on='Name', right_on='Name')
Name | Role | School | |
---|---|---|---|
0 | Kelly | Director of HR | NaN |
1 | Sally | Course liasion | Engineering |
2 | James | Grader | Business |
staff_df = pd.DataFrame([{'Name': 'Kelly', 'Role': 'Director of HR', 'Location': 'State Street'},
{'Name': 'Sally', 'Role': 'Course liasion', 'Location': 'Washington Avenue'},
{'Name': 'James', 'Role': 'Grader', 'Location': 'Washington Avenue'}])
student_df = pd.DataFrame([{'Name': 'James', 'School': 'Business', 'Location': '1024 Billiard Avenue'},
{'Name': 'Mike', 'School': 'Law', 'Location': 'Fraternity House #22'},
{'Name': 'Sally', 'School': 'Engineering', 'Location': '512 Wilson Crescent'}])
pd.merge(staff_df, student_df, how='left', left_on='Name', right_on='Name')
Location_x | Name | Role | Location_y | School | |
---|---|---|---|---|---|
0 | State Street | Kelly | Director of HR | NaN | NaN |
1 | Washington Avenue | Sally | Course liasion | 512 Wilson Crescent | Engineering |
2 | Washington Avenue | James | Grader | 1024 Billiard Avenue | Business |
staff_df = pd.DataFrame([{'First Name': 'Kelly', 'Last Name': 'Desjardins', 'Role': 'Director of HR'},
{'First Name': 'Sally', 'Last Name': 'Brooks', 'Role': 'Course liasion'},
{'First Name': 'James', 'Last Name': 'Wilde', 'Role': 'Grader'}])
student_df = pd.DataFrame([{'First Name': 'James', 'Last Name': 'Hammond', 'School': 'Business'},
{'First Name': 'Mike', 'Last Name': 'Smith', 'School': 'Law'},
{'First Name': 'Sally', 'Last Name': 'Brooks', 'School': 'Engineering'}])
pd.merge(staff_df, student_df, how='inner', left_on=['First Name','Last Name'], right_on=['First Name','Last Name'])
First Name | Last Name | Role | School | |
---|---|---|---|---|
0 | Sally | Brooks | Course liasion | Engineering |
‘Apply’
df = pd.read_csv('census.csv')
df.head()
SUMLEV | REGION | DIVISION | STATE | COUNTY | STNAME | CTYNAME | CENSUS2010POP | ESTIMATESBASE2010 | POPESTIMATE2010 | ... | RDOMESTICMIG2011 | RDOMESTICMIG2012 | RDOMESTICMIG2013 | RDOMESTICMIG2014 | RDOMESTICMIG2015 | RNETMIG2011 | RNETMIG2012 | RNETMIG2013 | RNETMIG2014 | RNETMIG2015 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 40 | 3 | 6 | 1 | 0 | Alabama | Alabama | 4779736 | 4780127 | 4785161 | ... | 0.002295 | -0.193196 | 0.381066 | 0.582002 | -0.467369 | 1.030015 | 0.826644 | 1.383282 | 1.724718 | 0.712594 |
1 | 50 | 3 | 6 | 1 | 1 | Alabama | Autauga County | 54571 | 54571 | 54660 | ... | 7.242091 | -2.915927 | -3.012349 | 2.265971 | -2.530799 | 7.606016 | -2.626146 | -2.722002 | 2.592270 | -2.187333 |
2 | 50 | 3 | 6 | 1 | 3 | Alabama | Baldwin County | 182265 | 182265 | 183193 | ... | 14.832960 | 17.647293 | 21.845705 | 19.243287 | 17.197872 | 15.844176 | 18.559627 | 22.727626 | 20.317142 | 18.293499 |
3 | 50 | 3 | 6 | 1 | 5 | Alabama | Barbour County | 27457 | 27457 | 27341 | ... | -4.728132 | -2.500690 | -7.056824 | -3.904217 | -10.543299 | -4.874741 | -2.758113 | -7.167664 | -3.978583 | -10.543299 |
4 | 50 | 3 | 6 | 1 | 7 | Alabama | Bibb County | 22915 | 22919 | 22861 | ... | -5.527043 | -5.068871 | -6.201001 | -0.177537 | 0.177258 | -5.088389 | -4.363636 | -5.403729 | 0.754533 | 1.107861 |
5 rows × 100 columns
df = df[df['SUMLEV']==50]
df.set_index(['STNAME','CTYNAME'], inplace=True)
df.head()
SUMLEV | REGION | DIVISION | STATE | COUNTY | CENSUS2010POP | ESTIMATESBASE2010 | POPESTIMATE2010 | POPESTIMATE2011 | POPESTIMATE2012 | ... | RDOMESTICMIG2011 | RDOMESTICMIG2012 | RDOMESTICMIG2013 | RDOMESTICMIG2014 | RDOMESTICMIG2015 | RNETMIG2011 | RNETMIG2012 | RNETMIG2013 | RNETMIG2014 | RNETMIG2015 | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STNAME | CTYNAME | |||||||||||||||||||||
Alabama | Autauga County | 50 | 3 | 6 | 1 | 1 | 54571 | 54571 | 54660 | 55253 | 55175 | ... | 7.242091 | -2.915927 | -3.012349 | 2.265971 | -2.530799 | 7.606016 | -2.626146 | -2.722002 | 2.592270 | -2.187333 |
Baldwin County | 50 | 3 | 6 | 1 | 3 | 182265 | 182265 | 183193 | 186659 | 190396 | ... | 14.832960 | 17.647293 | 21.845705 | 19.243287 | 17.197872 | 15.844176 | 18.559627 | 22.727626 | 20.317142 | 18.293499 | |
Barbour County | 50 | 3 | 6 | 1 | 5 | 27457 | 27457 | 27341 | 27226 | 27159 | ... | -4.728132 | -2.500690 | -7.056824 | -3.904217 | -10.543299 | -4.874741 | -2.758113 | -7.167664 | -3.978583 | -10.543299 | |
Bibb County | 50 | 3 | 6 | 1 | 7 | 22915 | 22919 | 22861 | 22733 | 22642 | ... | -5.527043 | -5.068871 | -6.201001 | -0.177537 | 0.177258 | -5.088389 | -4.363636 | -5.403729 | 0.754533 | 1.107861 | |
Blount County | 50 | 3 | 6 | 1 | 9 | 57322 | 57322 | 57373 | 57711 | 57776 | ... | 1.807375 | -1.177622 | -1.748766 | -2.062535 | -1.369970 | 1.859511 | -0.848580 | -1.402476 | -1.577232 | -0.884411 |
5 rows × 98 columns
# find min and max of following values for each index.
rows = ['POPESTIMATE2010',
'POPESTIMATE2011',
'POPESTIMATE2012',
'POPESTIMATE2013',
'POPESTIMATE2014',
'POPESTIMATE2015']
result = df.apply(lambda x: pd.Series({'min': x[rows].min(), 'max': x[rows].max()}), axis=1)
result.head()
min | max | ||
---|---|---|---|
STNAME | CTYNAME | ||
Alabama | Autauga County | 54660.0 | 55347.0 |
Baldwin County | 183193.0 | 203709.0 | |
Barbour County | 26489.0 | 27341.0 | |
Bibb County | 22512.0 | 22861.0 | |
Blount County | 57373.0 | 57776.0 |
Group by
df = pd.read_csv('census.csv')
df = df[df['SUMLEV']==50]
df.head()
SUMLEV | REGION | DIVISION | STATE | COUNTY | STNAME | CTYNAME | CENSUS2010POP | ESTIMATESBASE2010 | POPESTIMATE2010 | ... | RDOMESTICMIG2011 | RDOMESTICMIG2012 | RDOMESTICMIG2013 | RDOMESTICMIG2014 | RDOMESTICMIG2015 | RNETMIG2011 | RNETMIG2012 | RNETMIG2013 | RNETMIG2014 | RNETMIG2015 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 50 | 3 | 6 | 1 | 1 | Alabama | Autauga County | 54571 | 54571 | 54660 | ... | 7.242091 | -2.915927 | -3.012349 | 2.265971 | -2.530799 | 7.606016 | -2.626146 | -2.722002 | 2.592270 | -2.187333 |
2 | 50 | 3 | 6 | 1 | 3 | Alabama | Baldwin County | 182265 | 182265 | 183193 | ... | 14.832960 | 17.647293 | 21.845705 | 19.243287 | 17.197872 | 15.844176 | 18.559627 | 22.727626 | 20.317142 | 18.293499 |
3 | 50 | 3 | 6 | 1 | 5 | Alabama | Barbour County | 27457 | 27457 | 27341 | ... | -4.728132 | -2.500690 | -7.056824 | -3.904217 | -10.543299 | -4.874741 | -2.758113 | -7.167664 | -3.978583 | -10.543299 |
4 | 50 | 3 | 6 | 1 | 7 | Alabama | Bibb County | 22915 | 22919 | 22861 | ... | -5.527043 | -5.068871 | -6.201001 | -0.177537 | 0.177258 | -5.088389 | -4.363636 | -5.403729 | 0.754533 | 1.107861 |
5 | 50 | 3 | 6 | 1 | 9 | Alabama | Blount County | 57322 | 57322 | 57373 | ... | 1.807375 | -1.177622 | -1.748766 | -2.062535 | -1.369970 | 1.859511 | -0.848580 | -1.402476 | -1.577232 | -0.884411 |
5 rows × 100 columns
average_census2010pop = df.groupby('STNAME').agg({'CENSUS2010POP': np.average})
average_census2010pop.head()
CENSUS2010POP | |
---|---|
STNAME | |
Alabama | 71339.343284 |
Alaska | 24490.724138 |
Arizona | 426134.466667 |
Arkansas | 38878.906667 |
California | 642309.586207 |
avg_sum_census2010pop = (df.set_index('STNAME').groupby(level=0)['CENSUS2010POP'].agg({'avg': np.average, 'sum': np.sum}))
avg_sum_census2010pop.head()
C:\Users\vnfma\Anaconda3\lib\site-packages\ipykernel\__main__.py:1: FutureWarning: using a dict on a Series for aggregation
is deprecated and will be removed in a future version
if __name__ == '__main__':
avg | sum | |
---|---|---|
STNAME | ||
Alabama | 71339.343284 | 4779736 |
Alaska | 24490.724138 | 710231 |
Arizona | 426134.466667 | 6392017 |
Arkansas | 38878.906667 | 2915918 |
California | 642309.586207 | 37253956 |
avg_sum_popestimate = (df.set_index('STNAME').groupby(level=0)['POPESTIMATE2010','POPESTIMATE2011'].agg({'avg': np.average, 'sum': np.sum}))
avg_sum_popestimate.head()
avg | sum | |||
---|---|---|---|---|
POPESTIMATE2010 | POPESTIMATE2011 | POPESTIMATE2010 | POPESTIMATE2011 | |
STNAME | ||||
Alabama | 71420.313433 | 71658.328358 | 4785161 | 4801108 |
Alaska | 24621.413793 | 24921.379310 | 714021 | 722720 |
Arizona | 427213.866667 | 431248.800000 | 6408208 | 6468732 |
Arkansas | 38965.253333 | 39180.506667 | 2922394 | 2938538 |
California | 643691.017241 | 650000.586207 | 37334079 | 37700034 |
Scales
df = pd.DataFrame(['A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-', 'D+', 'D'],
index=['excellent', 'excellent', 'excellent', 'good', 'good', 'good', 'ok', 'ok', 'ok', 'poor', 'poor'])
df.rename(columns={0: 'Grades'}, inplace=True)
df['Grades'].head()
excellent A+
excellent A
excellent A-
good B+
good B
Name: Grades, dtype: object
df['Grades'].astype('category').head()
excellent A+
excellent A
excellent A-
good B+
good B
Name: Grades, dtype: category
Categories (11, object): [A, A+, A-, B, ..., C+, C-, D, D+]
grades = df['Grades'].astype('category',
categories=['D', 'D+', 'C-', 'C', 'C+', 'B-', 'B', 'B+', 'A-', 'A', 'A+'],
ordered=True)
grades.head()
C:\Users\vnfma\Anaconda3\lib\site-packages\ipykernel\__main__.py:3: FutureWarning: specifying 'categories' or 'ordered' in .astype() is deprecated; pass a CategoricalDtype instead
app.launch_new_instance()
excellent A+
excellent A
excellent A-
good B+
good B
Name: Grades, dtype: category
Categories (11, object): [D < D+ < C- < C ... B+ < A- < A < A+]
grades > 'C'
excellent True
excellent True
excellent True
good True
good True
good True
ok True
ok False
ok False
poor False
poor False
Name: Grades, dtype: bool
df = pd.read_csv('census.csv')
df = df[df['SUMLEV']==50]
df = df.set_index('STNAME').groupby(level=0)['CENSUS2010POP'].agg({'avg': np.average})
scaled_df = pd.cut(df['avg'],10)
scaled_df.head()
C:\Users\vnfma\Anaconda3\lib\site-packages\ipykernel\__main__.py:3: FutureWarning: using a dict on a Series for aggregation
is deprecated and will be removed in a future version
app.launch_new_instance()
STNAME
Alabama (11706.087, 75333.413]
Alaska (11706.087, 75333.413]
Arizona (390320.176, 453317.529]
Arkansas (11706.087, 75333.413]
California (579312.234, 642309.586]
Name: avg, dtype: category
Categories (10, interval[float64]): [(11706.087, 75333.413] < (75333.413, 138330.766] < (138330.766, 201328.118] < (201328.118, 264325.471] ... (390320.176, 453317.529] < (453317.529, 516314.881] < (516314.881, 579312.234] < (579312.234, 642309.586]]
Pivot Tables
df = pd.read_csv('cars.csv')
df.head()
YEAR | Make | Model | Size | (kW) | Unnamed: 5 | TYPE | CITY (kWh/100 km) | HWY (kWh/100 km) | COMB (kWh/100 km) | CITY (Le/100 km) | HWY (Le/100 km) | COMB (Le/100 km) | (g/km) | RATING | (km) | TIME (h) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2012 | MITSUBISHI | i-MiEV | SUBCOMPACT | 49 | A1 | B | 16.9 | 21.4 | 18.7 | 1.9 | 2.4 | 2.1 | 0 | NaN | 100 | 7 |
1 | 2012 | NISSAN | LEAF | MID-SIZE | 80 | A1 | B | 19.3 | 23.0 | 21.1 | 2.2 | 2.6 | 2.4 | 0 | NaN | 117 | 7 |
2 | 2013 | FORD | FOCUS ELECTRIC | COMPACT | 107 | A1 | B | 19.0 | 21.1 | 20.0 | 2.1 | 2.4 | 2.2 | 0 | NaN | 122 | 4 |
3 | 2013 | MITSUBISHI | i-MiEV | SUBCOMPACT | 49 | A1 | B | 16.9 | 21.4 | 18.7 | 1.9 | 2.4 | 2.1 | 0 | NaN | 100 | 7 |
4 | 2013 | NISSAN | LEAF | MID-SIZE | 80 | A1 | B | 19.3 | 23.0 | 21.1 | 2.2 | 2.6 | 2.4 | 0 | NaN | 117 | 7 |
df.pivot_table(values='(kW)', index='YEAR', columns='Make', aggfunc=np.mean)
Make | BMW | CHEVROLET | FORD | KIA | MITSUBISHI | NISSAN | SMART | TESLA |
---|---|---|---|---|---|---|---|---|
YEAR | ||||||||
2012 | NaN | NaN | NaN | NaN | 49.0 | 80.0 | NaN | NaN |
2013 | NaN | NaN | 107.0 | NaN | 49.0 | 80.0 | 35.0 | 280.000000 |
2014 | NaN | 104.0 | 107.0 | NaN | 49.0 | 80.0 | 35.0 | 268.333333 |
2015 | 125.0 | 104.0 | 107.0 | 81.0 | 49.0 | 80.0 | 35.0 | 320.666667 |
2016 | 125.0 | 104.0 | 107.0 | 81.0 | 49.0 | 80.0 | 35.0 | 409.700000 |
df.pivot_table(values='(kW)', index='YEAR', columns='Make', aggfunc=[np.mean,np.min], margins=True)
mean | amin | |||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Make | BMW | CHEVROLET | FORD | KIA | MITSUBISHI | NISSAN | SMART | TESLA | All | BMW | CHEVROLET | FORD | KIA | MITSUBISHI | NISSAN | SMART | TESLA | All |
YEAR | ||||||||||||||||||
2012 | NaN | NaN | NaN | NaN | 49.0 | 80.0 | NaN | NaN | 64.500000 | NaN | NaN | NaN | NaN | 49.0 | 80.0 | NaN | NaN | 49 |
2013 | NaN | NaN | 107.0 | NaN | 49.0 | 80.0 | 35.0 | 280.000000 | 158.444444 | NaN | NaN | 107.0 | NaN | 49.0 | 80.0 | 35.0 | 270.0 | 35 |
2014 | NaN | 104.0 | 107.0 | NaN | 49.0 | 80.0 | 35.0 | 268.333333 | 135.000000 | NaN | 104.0 | 107.0 | NaN | 49.0 | 80.0 | 35.0 | 225.0 | 35 |
2015 | 125.0 | 104.0 | 107.0 | 81.0 | 49.0 | 80.0 | 35.0 | 320.666667 | 181.428571 | 125.0 | 104.0 | 107.0 | 81.0 | 49.0 | 80.0 | 35.0 | 280.0 | 35 |
2016 | 125.0 | 104.0 | 107.0 | 81.0 | 49.0 | 80.0 | 35.0 | 409.700000 | 252.263158 | 125.0 | 104.0 | 107.0 | 81.0 | 49.0 | 80.0 | 35.0 | 283.0 | 35 |
All | 125.0 | 104.0 | 107.0 | 81.0 | 49.0 | 80.0 | 35.0 | 345.478261 | 190.622642 | 125.0 | 104.0 | 107.0 | 81.0 | 49.0 | 80.0 | 35.0 | 225.0 | 35 |
Date Functionality in Pandas
Timestamp
pd.Timestamp('9/1/2016 10:05AM')
Timestamp('2016-09-01 10:05:00')
Period
pd.Period('1/2016')
Period('2016-01', 'M')
pd.Period('3/5/2016')
Period('2016-03-05', 'D')
DatetimeIndex
t1 = pd.Series(list('abc'), [pd.Timestamp('2016-09-01'), pd.Timestamp('2016-09-02'), pd.Timestamp('2016-09-03')])
t1
2016-09-01 a
2016-09-02 b
2016-09-03 c
dtype: object
type(t1.index)
pandas.core.indexes.datetimes.DatetimeIndex
PeriodIndex
t2 = pd.Series(list('def'), [pd.Period('2016-09'), pd.Period('2016-10'), pd.Period('2016-11')])
t2
2016-09 d
2016-10 e
2016-11 f
Freq: M, dtype: object
type(t2.index)
pandas.core.indexes.period.PeriodIndex
Converting to Datetime
d1 = ['2 June 2013', 'Aug 29, 2014', '2015-06-26', '7/12/16']
ts3 = pd.DataFrame(np.random.randint(10, 100, (4,2)), index=d1, columns=list('ab'))
ts3
a | b | |
---|---|---|
2 June 2013 | 94 | 71 |
Aug 29, 2014 | 37 | 11 |
2015-06-26 | 76 | 76 |
7/12/16 | 97 | 33 |
ts3.index = pd.to_datetime(ts3.index)
ts3
a | b | |
---|---|---|
2013-06-02 | 94 | 71 |
2014-08-29 | 37 | 11 |
2015-06-26 | 76 | 76 |
2016-07-12 | 97 | 33 |
pd.to_datetime('4.7.12', dayfirst=True)
Timestamp('2012-07-04 00:00:00')
Timedeltas
pd.Timestamp('9/3/2016')-pd.Timestamp('9/1/2016')
Timedelta('2 days 00:00:00')
pd.Timestamp('9/2/2016 8:10AM') + pd.Timedelta('12D 3H')
Timestamp('2016-09-14 11:10:00')
Working with Dates in a Dataframe
dates = pd.date_range('10-01-2016', periods=9, freq='2W-SUN')
dates
DatetimeIndex(['2016-10-02', '2016-10-16', '2016-10-30', '2016-11-13',
'2016-11-27', '2016-12-11', '2016-12-25', '2017-01-08',
'2017-01-22'],
dtype='datetime64[ns]', freq='2W-SUN')
df = pd.DataFrame({'Count 1': 100 + np.random.randint(-5, 10, 9).cumsum(),
'Count 2': 120 + np.random.randint(-5, 10, 9)}, index=dates)
df
Count 1 | Count 2 | |
---|---|---|
2016-10-02 | 108 | 126 |
2016-10-16 | 112 | 115 |
2016-10-30 | 118 | 117 |
2016-11-13 | 113 | 128 |
2016-11-27 | 110 | 127 |
2016-12-11 | 109 | 125 |
2016-12-25 | 114 | 125 |
2017-01-08 | 112 | 127 |
2017-01-22 | 114 | 117 |
df.index.weekday_name
Index(['Sunday', 'Sunday', 'Sunday', 'Sunday', 'Sunday', 'Sunday', 'Sunday',
'Sunday', 'Sunday'],
dtype='object')
df.diff()
Count 1 | Count 2 | |
---|---|---|
2016-10-02 | NaN | NaN |
2016-10-16 | 4.0 | -11.0 |
2016-10-30 | 6.0 | 2.0 |
2016-11-13 | -5.0 | 11.0 |
2016-11-27 | -3.0 | -1.0 |
2016-12-11 | -1.0 | -2.0 |
2016-12-25 | 5.0 | 0.0 |
2017-01-08 | -2.0 | 2.0 |
2017-01-22 | 2.0 | -10.0 |
df.resample('M').mean()
Count 1 | Count 2 | |
---|---|---|
2016-10-31 | 112.666667 | 119.333333 |
2016-11-30 | 111.500000 | 127.500000 |
2016-12-31 | 111.500000 | 125.000000 |
2017-01-31 | 113.000000 | 122.000000 |
df['2017']
Count 1 | Count 2 | |
---|---|---|
2017-01-08 | 112 | 127 |
2017-01-22 | 114 | 117 |
df['2016-12']
Count 1 | Count 2 | |
---|---|---|
2016-12-11 | 109 | 125 |
2016-12-25 | 114 | 125 |
df['2016-12':]
Count 1 | Count 2 | |
---|---|---|
2016-12-11 | 109 | 125 |
2016-12-25 | 114 | 125 |
2017-01-08 | 112 | 127 |
2017-01-22 | 114 | 117 |
df.asfreq('W', method='ffill')
Count 1 | Count 2 | |
---|---|---|
2016-10-02 | 108 | 126 |
2016-10-09 | 108 | 126 |
2016-10-16 | 112 | 115 |
2016-10-23 | 112 | 115 |
2016-10-30 | 118 | 117 |
2016-11-06 | 118 | 117 |
2016-11-13 | 113 | 128 |
2016-11-20 | 113 | 128 |
2016-11-27 | 110 | 127 |
2016-12-04 | 110 | 127 |
2016-12-11 | 109 | 125 |
2016-12-18 | 109 | 125 |
2016-12-25 | 114 | 125 |
2017-01-01 | 114 | 125 |
2017-01-08 | 112 | 127 |
2017-01-15 | 112 | 127 |
2017-01-22 | 114 | 117 |
Leave a Comment