Pandas Session 9 code [Final]

Pandas Session 9 Code For Video Click Fahad Hussain CS


Hi, Welcome to Fahad Hussain CS blog, do not forget to subscribe and follow the blog and YouTube channel  Fahad Hussain CS

Code:

import numpy as np

import pandas as pd


values = pd.Series(['apple', 'orange', 'apple',

                    'apple'] * 2)

print(values)

print(pd.unique(values))

print(pd.value_counts(values))


values = pd.Series([0, 1, 0, 0] * 2)

dim = pd.Series(['apple', 'orange'])

print(values)

print(dim)


dim.take(values)


"""### Categorical Type in pandas"""


fruits = ['apple', 'orange', 'apple', 'apple'] * 2

N = len(fruits)

df = pd.DataFrame({'fruit': fruits,

                   'basket_id': np.arange(N),

                   'count': np.random.randint(3, 15, size=N),

                   'weight': np.random.uniform(0, 4, size=N)},

                  columns=['basket_id', 'fruit', 'count', 'weight'])

df


# astype() method is used to cast a pandas object to a specified dtype. astype() function also 

# provides the capability to convert any suitable existing column to categorical type. 


fruit_cat = df['fruit'].astype('category')

fruit_cat


c = fruit_cat.values

print(c)

print(type(c))


print(c.categories)

print(c.codes)


df['fruit'] = df['fruit'].astype('category')

df.fruit


my_categories = pd.Categorical(['foo', 'bar', 'baz', 'foo', 'bar'])

my_categories


categories = ['foo', 'bar', 'baz']

codes = [0, 1, 2, 0, 0, 1]

my_cats_2 = pd.Categorical.from_codes(codes, categories)

my_cats_2


ordered_cat = pd.Categorical.from_codes(codes, categories,

                                        ordered=True)

ordered_cat


my_cats_2.as_ordered()


"""### Computations with Categoricals"""


# With the seed reset (every time), the same set of numbers will appear every time.

# If the random seed is not reset, different numbers appear with every invocation


np.random.seed(12345)

draws = np.random.randn(1000)

print(draws)


bins = pd.qcut(draws, 4)

bins


bins = pd.qcut(draws, 4, labels=['Q1', 'Q2', 'Q3', 'Q4'])

print(bins)

print(bins.codes[:10])


bins = pd.Series(bins, name='quartile')

results = (pd.Series(draws)

           .groupby(bins)

           .agg(['count', 'min', 'max'])

           .reset_index())

results


results['quartile']


"""#### Better performance with categoricals"""


N = 10000000

draws = pd.Series(np.random.randn(N))

labels = pd.Series(['foo', 'bar', 'baz', 'qux'] * (N // 4))


print(draws)

print(labels)


categories = labels.astype('category')

print(categories)


print(labels.memory_usage())

print(categories.memory_usage())


# Commented out IPython magic to ensure Python compatibility.

# %time _ = labels.astype('category')


"""### Categorical Methods"""


s = pd.Series(['a', 'b', 'c', 'd'] * 2)

cat_s = s.astype('category')

cat_s


print(cat_s.cat.codes)

print(cat_s.cat.categories)


actual_categories = ['a', 'b', 'c', 'd', 'e']

cat_s2 = cat_s.cat.set_categories(actual_categories)

cat_s2


print(cat_s.value_counts())

print(cat_s2.value_counts())


cat_s3 = cat_s[cat_s.isin(['a', 'b'])]

print(cat_s3)

print(cat_s3.cat.remove_unused_categories())


"""#### Creating dummy variables for modeling"""


cat_s = pd.Series(['a', 'b', 'c', 'd'] * 2, dtype='category')


pd.get_dummies(cat_s)


"""## Advanced GroupBy Use


**bold text**### Group Transforms and "Unwrapped" GroupBys

"""


df = pd.DataFrame({'key': ['a', 'b', 'c'] * 4,

                   'value': np.arange(12.)})

df


g = df.groupby('key').value

print(g)

print(g.mean())


g.transform(lambda x: x.mean())


g.transform('mean')


g.transform(lambda x: x * 2)


g.transform(lambda x: x.rank(ascending=False))


def normalize(x):

    return (x - x.mean()) / x.std()




g.transform(normalize)

g.apply(normalize)


g.transform('mean')

normalized = (df['value'] - g.transform('mean')) / g.transform('std')

normalized


"""### Grouped Time Resampling"""


N = 15

times = pd.date_range('2017-05-20 00:00', freq='1min', periods=N)

df = pd.DataFrame({'time': times,

                   'value': np.arange(N)})

df


df.set_index('time').resample('5min').count()


df2 = pd.DataFrame({'time': times.repeat(3),

                    'key': np.tile(['a', 'b', 'c'], N),

                    'value': np.arange(N * 3.)})

df2[:7]


"""## End of Pandas Now.... Hope you will enjoy the entire journey of NUMPY and Pandas

#See you in the next Course

#Do Subscribe and Shares

"""

No comments:

Post a Comment

Fell free to write your query in comment. Your Comments will be fully encouraged.