/python/pandas_2.py
Python | 86 lines | 62 code | 17 blank | 7 comment | 0 complexity | 40a0b27f562e799a719771f753ee6244 MD5 | raw file
- import numpy as np
- import pandas as pd
- arr = np.arange(4).astype('float64')
- arr[2] = np.nan
- def sample_stdev(arr):
- # convert to dataframe
- df = pd.DataFrame(arr)
- # drop na samples
- df = df.dropna()
- # return std dev, just it's values, not the full df
- return df.std().value
- def sample_stdev2(arr):
- arr = arr[-np.isnan(arr)]
- results = np.std(arr)
- return results
- x = np.array([np.nan, 1, 2])
- sample_stdev(x)
- sample_stdev2(x)
- from pandas import Series, DataFrame
- goog = returns.ix[:, "GOOG"]
- goog = returns.GOOG
- val = [1, 2, 3]
- labels = ['A', 'B', 'C']
- s = Series(val, labels)
- s.ix['A':'B']
- s.values
- # series is one dimension of a DataFrame, like a vector
- val = [np.nan, 1, 2]
- s = Series(val, labels)
- s.std()
- s.mean()
- s.max()
- # let's make a DataFrame
- val = [[1, 2, 3],
- [4, 5, 6],
- [7, 8, 9]]
- rows = ['B', 'C', 'D']
- cols = ['X', 'Y', 'Z']
- df = DataFrame(val, rows, cols)
- df.ix['C', 'Y']
- df.ix[['C', 'B'], 'Y']
- type(df.ix[['C', 'B'], 'Y'])
- df.ix[['C', 'B'], ['X', 'Y']]
- df.index
- df.columns
- df.values
- val = [1, 2, 3]
- s = Series(val, labels)
- s2 = df.ix[:, 'Y']
- s + s2
- s.add(s2, fill_value=0)
- df.sum()
- df.sum(axis=1)
- df.mean()
- df.std()
- def cond_mean(returns):
- # avg daily returns of GOOG when AAPL is UP
- # and also when AAPL is DOWN
- ret1 = returns.ix[:, 0]
- ret2 = returns.ix[:, 1]
- mask = ret2 > 0
- up1 = ret1[mask]
- down1 = ret1[-mask]
- s = Series([up1.mean(), down1.mean()], ['UP', 'DOWN'])
- return s
-
- rs = cond_mean(returns)
- assert type(rs) == Series
- val = [number, other_number]
- Series(Val, ['UP', 'DOWN']