import seaborn as sns
import numpy as np
df = sns.load_dataset('iris')
x = df.petal_length.values
y = df.petal_width.values
# Con numpy
np.polyfit(x,y, deg=1)
array([ 0.41575542, -0.36307552])
reg = np.polyfit(x,y, deg=1)
import matplotlib.pyplot as plt
f = lambda x: reg[1] + reg[0] * x
plt.scatter(x, y)
plt.plot(x, f(x), color='red')
[<matplotlib.lines.Line2D at 0x1378fcc49d0>]
La autocorrelacion es la correlacion de la serie de tiempo con una copia de un retraso de ella misma:
import yfinance as yf
df = yf.Ticker('aapl').history(interval='1d', start='2022-01-01', end='2022-12-26')
df.head()
Open | High | Low | Close | Volume | Dividends | Stock Splits | |
---|---|---|---|---|---|---|---|
Date | |||||||
2022-01-03 | 176.803859 | 181.824722 | 176.684557 | 180.959732 | 104487900 | 0.0 | 0 |
2022-01-04 | 181.576172 | 181.884380 | 178.086416 | 178.663071 | 99310400 | 0.0 | 0 |
2022-01-05 | 178.573600 | 179.130366 | 173.632277 | 173.910660 | 94537600 | 0.0 | 0 |
2022-01-06 | 171.703450 | 174.288453 | 170.649569 | 171.007492 | 96904000 | 0.0 | 0 |
2022-01-07 | 171.892376 | 173.135163 | 170.043108 | 171.176529 | 86709100 | 0.0 | 0 |
df.index = pd.to_datetime(df.index)
df['Open'].autocorr()
0.9611366874532178
Estacionariedad: la distribución conjunta de las observaciones no depende del tiempo. Estacionariedad débil: la media, la varianza y las autocorrelaciones de las observaciones no dependen del tiempo Los procesos no estacionarios se vuelve dificiles de modelar LAs series estacionales (repiten patrones en cierto período de tiempo ) son no estacionarias
df = df.reset_index().reset_index()
df
index | Date | Open | High | Low | Close | Volume | Dividends | Stock Splits | |
---|---|---|---|---|---|---|---|---|---|
0 | 0 | 2022-01-03 | 176.803859 | 181.824722 | 176.684557 | 180.959732 | 104487900 | 0.0 | 0 |
1 | 1 | 2022-01-04 | 181.576172 | 181.884380 | 178.086416 | 178.663071 | 99310400 | 0.0 | 0 |
2 | 2 | 2022-01-05 | 178.573600 | 179.130366 | 173.632277 | 173.910660 | 94537600 | 0.0 | 0 |
3 | 3 | 2022-01-06 | 171.703450 | 174.288453 | 170.649569 | 171.007492 | 96904000 | 0.0 | 0 |
4 | 4 | 2022-01-07 | 171.892376 | 173.135163 | 170.043108 | 171.176529 | 86709100 | 0.0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
242 | 242 | 2022-12-19 | 135.110001 | 135.199997 | 131.320007 | 132.369995 | 79592600 | 0.0 | 0 |
243 | 243 | 2022-12-20 | 131.389999 | 133.250000 | 129.889999 | 132.300003 | 77432800 | 0.0 | 0 |
244 | 244 | 2022-12-21 | 132.979996 | 136.809998 | 132.750000 | 135.449997 | 85928000 | 0.0 | 0 |
245 | 245 | 2022-12-22 | 134.350006 | 134.559998 | 130.300003 | 132.229996 | 77852100 | 0.0 | 0 |
246 | 246 | 2022-12-23 | 130.919998 | 132.419998 | 129.639999 | 131.860001 | 63771000 | 0.0 | 0 |
247 rows × 9 columns
plt.plot(df['Open'].diff())
[<matplotlib.lines.Line2D at 0x1379904fbe0>]
plt.plot(df['Open'])
[<matplotlib.lines.Line2D at 0x137990a5cd0>]