In [5]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import matplotlib.pyplot as pl
import scipy.stats as st
plt.style.use('ggplot')

import glob
import pandas as pd
In [12]:
parsed_data = map(lambda path: pd.read_csv(path, sep='\t'), 
                  glob.glob('/home/ben/Downloads/complete_flights_O1-O6/*.txt'))
parsed_data = list(parsed_data)
In [60]:
for df in parsed_data:
    plt.plot(df.x, df.y, alpha=.25, linestyle='--')
    plt.scatter(df.x, df.y)
    plt.show()
In [139]:
df = pd.concat(parsed_data, axis=0)
In [140]:
df.head()
Out[140]:
frame time x y value filename filedate
0 7712 7712 233.543 427.179 106 20100831_p1_7713.BMP 2010-08-31 14:18:24
1 7715 7715 238.346 429.727 65 20100831_p1_7716.BMP 2010-08-31 14:18:27
2 7720 7720 236.898 426.009 127 20100831_p1_7721.BMP 2010-08-31 14:18:32
3 7726 7726 229.925 417.881 77 20100831_p1_7727.BMP 2010-08-31 14:18:38
4 7731 7731 230.421 424.044 118 20100831_p1_7732.BMP 2010-08-31 14:18:43
In [150]:
sns.jointplot(df.x, df.y, size=16, joint_kws={'alpha': .25})
Out[150]:
<seaborn.axisgrid.JointGrid at 0x7f32ad767860>
In [137]:
plt.figure(figsize=(16, 16))
for df in parsed_data:
    plt.plot(df.x, df.y, alpha=.25, linestyle='--')
    plt.scatter(df.x, df.y, alpha=.75)
plt.show()
lowx, highx = np.percentile(df.x, 15), np.percentile(df.x, 85) lowy, highy = np.percentile(df.y, 15), np.percentile(df.y, 85) df_filtered = df[np.logical_and(np.logical_and(df.x > lowx, df.x < highx), np.logical_and(df.y > lowy, df.y < highy))]
In [142]:
#data = np.random.multivariate_normal((0, 0), [[0.8, 0.05], [0.05, 0.7]], 100)
x = df.x
y = df.y
xmin, xmax = df.x.min(), df.x.max()
ymin, ymax = df.y.min(), df.y.max()

# Peform the kernel density estimate
xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([xx.ravel(), yy.ravel()])
values = np.vstack([x, y])
kernel = st.gaussian_kde(values, .75)
f = np.reshape(kernel(positions).T, xx.shape)

f = f / np.max(f)
#f = np.log(f)# + 100 * (f / np.max(f))

fig = pl.figure(figsize=(16, 16))
ax = fig.gca()

ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
# Contourf plot
cfset = ax.contourf(xx, yy, f, cmap='Blues')
## Or kernel density estimate plot instead of the contourf plot
#ax.imshow(np.rot90(f), cmap='Blues', extent=[xmin, xmax, ymin, ymax])
# Contour plot
cset = ax.contour(xx, yy, f, colors='k')
# Label plot
ax.clabel(cset, inline=1, fontsize=10)
ax.set_xlabel('Y1')
ax.set_ylabel('Y0')

ax.scatter(df.x, df.y, alpha=.1)

pl.show()
In [143]:
## data = np.random.multivariate_normal((0, 0), [[0.8, 0.05], [0.05, 0.7]], 100)
x = df.x
y = df.y
xmin, xmax = df.x.min(), df.x.max()
ymin, ymax = df.y.min(), df.y.max()

# Peform the kernel density estimate
xx, yy = np.mgrid[xmin:xmax:200j, ymin:ymax:200j]
positions = np.vstack([xx.ravel(), yy.ravel()])
values = np.vstack([x, y])
kernel = st.gaussian_kde(values, .005)
f = np.reshape(kernel(positions).T, xx.shape)

f = np.log(f)

fig = pl.figure(figsize=(16, 16))
ax = fig.gca()

ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
# Contourf plot
cfset = ax.contourf(xx, yy, f, cmap='Blues')
## Or kernel density estimate plot instead of the contourf plot
#ax.imshow(np.rot90(f), cmap='Blues', extent=[xmin, xmax, ymin, ymax])
# Contour plot
#cset = ax.contour(xx, yy, f, colors='k')
# Label plot
ax.clabel(cset, inline=1, fontsize=10)
ax.set_xlabel('Y1')
ax.set_ylabel('Y0')

#ax.scatter(df.x, df.y, alpha=.1)

pl.show()
/usr/lib/python3.6/site-packages/ipykernel/__main__.py:14: RuntimeWarning: divide by zero encountered in log
In [144]:
## data = np.random.multivariate_normal((0, 0), [[0.8, 0.05], [0.05, 0.7]], 100)
x = df.x
y = df.y
xmin, xmax = df.x.min(), df.x.max()
ymin, ymax = df.y.min(), df.y.max()

# Peform the kernel density estimate
xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([xx.ravel(), yy.ravel()])
values = np.vstack([x, y])
kernel = st.gaussian_kde(values, .5)
f = np.reshape(kernel(positions).T, xx.shape)

f = np.log(f)

fig = pl.figure(figsize=(16, 16))
ax = fig.gca()

ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
# Contourf plot
cfset = ax.contourf(xx, yy, f, cmap='Blues')
## Or kernel density estimate plot instead of the contourf plot
#ax.imshow(np.rot90(f), cmap='Blues', extent=[xmin, xmax, ymin, ymax])
# Contour plot
cset = ax.contour(xx, yy, f, colors='k')
# Label plot
ax.clabel(cset, inline=1, fontsize=10)
ax.set_xlabel('Y1')
ax.set_ylabel('Y0')

ax.scatter(df.x, df.y, alpha=.1)

pl.show()
In [145]:
## data = np.random.multivariate_normal((0, 0), [[0.8, 0.05], [0.05, 0.7]], 100)
x = df.x
y = df.y
xmin, xmax = df.x.min(), df.x.max()
ymin, ymax = df.y.min(), df.y.max()

# Peform the kernel density estimate
xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([xx.ravel(), yy.ravel()])
values = np.vstack([x, y])
kernel = st.gaussian_kde(values, .01)
f = np.reshape(kernel(positions).T, xx.shape)

f = np.log(f)

fig = pl.figure(figsize=(16, 16))
ax = fig.gca()

ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
# Contourf plot
cfset = ax.contourf(xx, yy, f, cmap='Blues')
## Or kernel density estimate plot instead of the contourf plot
#ax.imshow(np.rot90(f), cmap='Blues', extent=[xmin, xmax, ymin, ymax])
# Contour plot
cset = ax.contour(xx, yy, f, colors='k')
# Label plot
ax.clabel(cset, inline=1, fontsize=10)
ax.set_xlabel('Y1')
ax.set_ylabel('Y0')

ax.scatter(df.x, df.y, alpha=.1)

pl.show()
/usr/lib/python3.6/site-packages/ipykernel/__main__.py:14: RuntimeWarning: divide by zero encountered in log
In [146]:
## data = np.random.multivariate_normal((0, 0), [[0.8, 0.05], [0.05, 0.7]], 100)
x = df.x
y = df.y
xmin, xmax = df.x.min(), df.x.max()
ymin, ymax = df.y.min(), df.y.max()

# Peform the kernel density estimate
xx, yy = np.mgrid[xmin:xmax:250j, ymin:ymax:250j]
positions = np.vstack([xx.ravel(), yy.ravel()])
values = np.vstack([x, y])
kernel = st.gaussian_kde(values, .01)
f = np.reshape(kernel(positions).T, xx.shape)

f = np.log(f)

fig = pl.figure(figsize=(16, 16))
ax = fig.gca()

ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
# Contourf plot
cfset = ax.contourf(xx, yy, f, 255, cmap='Blues')
## Or kernel density estimate plot instead of the contourf plot
#ax.imshow(np.rot90(f), cmap='Blues', extent=[xmin, xmax, ymin, ymax])
# Contour plot
#cset = ax.contour(xx, yy, f, colors='k')
# Label plot
ax.clabel(cset, inline=1, fontsize=10)
ax.set_xlabel('Y1')
ax.set_ylabel('Y0')

ax.scatter(df.x, df.y, alpha=.05)

pl.show()
/usr/lib/python3.6/site-packages/ipykernel/__main__.py:14: RuntimeWarning: divide by zero encountered in log