import requests
import statsmodels.formula.api as smf
from sklearn.linear_model import LinearRegression
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
import datetime
import lxml


df_used = pd.read_csv('vehicles.csv')
df_used


URL = "https://www.goodcarbadcar.net/2020-us-vehicle-sales-figures-by-model/"
header = {
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
  "X-Requested-With": "XMLHttpRequest"
}
r = requests.get(URL, headers=header)

tables = pd.read_html(r.text,match="Model")
df2020 = tables[0]


URL = "https://www.goodcarbadcar.net/2019-us-vehicle-sales-figures-by-model/"
header = {
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
  "X-Requested-With": "XMLHttpRequest"
}
r = requests.get(URL, headers=header)

tables = pd.read_html(r.text,match="Model")
df2019 = tables[1]


URL = "https://www.goodcarbadcar.net/2021-us-vehicle-sales-figures-by-model/"
header = {
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
  "X-Requested-With": "XMLHttpRequest"
}
r = requests.get(URL, headers=header)

tables = pd.read_html(r.text,match="Model")
df2021 = tables[1]


# Vehicles Data:

# drop first 29 rows as they are junk
dfu = df_used.iloc[30:]

# keep only necessary columns of used vehicles data and rename manufacturer to make
dfu = dfu[['price','year','manufacturer','model','condition','fuel','transmission','type','state','posting_date']]
dfu = dfu.rename(columns={'manufacturer': 'Make', 'model': 'Model'})

# remove cells with NAN/blank for make, model, price, or year
# Note: There are nan conditions,these are probably cars that are in poor or average condition(bias), 
#       because sellers/posters would be more likely to include condition in posting if the car were in good or excellent condition

dfu.replace(r'^\s*$', np.nan, regex=True)
dfu = dfu[dfu['Make'].notna()]
dfu = dfu[dfu['Model'].notna()]
dfu = dfu[dfu['year'].notna()]
dfu.head()


# change the way posting date is stored to datetime objects so they are comparable (Long exectution time)
dfu['posting_date'] = dfu['posting_date'].apply(lambda x: pd.to_datetime(x))
print(dfu['posting_date'][200000])
print(type(dfu['posting_date'][200000]))

2021-04-04 21:54:57-04:00
<class 'pandas._libs.tslibs.timestamps.Timestamp'>


# add supertype column to differentiate trucks, suvs, and cars
#SUV - SUV, minivan, wagon
#CAR - sedan, hatchback, coupe, convertible
#TRUCK - truck, pickup
#other - other, offroad, van, bus
#NAN - nan
dfu.loc[dfu["type"] == "SUV", "supertype"] = "SUV"
dfu.loc[dfu["type"] == "mini-van", "supertype"] = "SUV"
dfu.loc[dfu["type"] == "wagon", "supertype"] = "SUV"
dfu.loc[dfu["type"] == "sedan", "supertype"] = "CAR"
dfu.loc[dfu["type"] == "coupe", "supertype"] = "CAR"
dfu.loc[dfu["type"] == "hatchback", "supertype"] = "CAR"
dfu.loc[dfu["type"] == "convertible", "supertype"] = "CAR"
dfu.loc[dfu["type"] == "truck", "supertype"] = "TRUCK"
dfu.loc[dfu["type"] == "pickup", "supertype"] = "TRUCK"
dfu.loc[dfu["type"] == "other", "supertype"] = "OTHER"
dfu.loc[dfu["type"] == "offroad", "supertype"] = "OTHER"
dfu.loc[dfu["type"] == "van", "supertype"] = "OTHER"
dfu.loc[dfu["type"] == "bus", "supertype"] = "OTHER"
dfu.loc[dfu["type"] == np.nan, "supertype"] = np.nan
dfu.head()


# Remove likely incorrect or impossible data points such as price being over 1 million and year being above 2021. 
# Also remove data that is too old to be relevent to current car pricing, which in this case will be quantified as any year 1990
dfu = dfu[dfu.year < 2022]
dfu = dfu[dfu.year > 1990]
dfu = dfu[dfu.price < 1000000]
dfu = dfu[dfu.price > 500]

# Remove any remaining outliers in price and year
q = dfu["price"].quantile(0.99)
dfu = dfu[dfu["price"] < q]
q = dfu["price"].quantile(0.01)
dfu = dfu[dfu["price"] > q]
q = dfu["year"].quantile(0.01)
dfu = dfu[dfu["year"] > q]


# Sales Data:

# Last 2 rows need to be dropped as they are sum of sales and nans
df2019 = df2019.iloc[:-2]
df2020 = df2020.iloc[:-2]
df2021 = df2021.iloc[:-2]

# remove oct-dec columns of the 2021 data set as they are still in progress of being uploaded
df2021 = df2021[df2021.columns[:10]]

# Convert sales data types from objects to integers
df2019['Jan'] = pd.to_numeric(df2019['Jan']);
df2019['Feb'] = pd.to_numeric(df2019['Feb']);
df2019['Mar'] = pd.to_numeric(df2019['Mar']);
df2019['Apr'] = pd.to_numeric(df2019['Apr']);
df2019['May'] = pd.to_numeric(df2019['May']);
df2019['Jun'] = pd.to_numeric(df2019['Jun']);
df2019['Jul'] = pd.to_numeric(df2019['Jul']);
df2019['Aug'] = pd.to_numeric(df2019['Aug']);
df2019['Sep'] = pd.to_numeric(df2019['Sep']);
df2019['Oct'] = pd.to_numeric(df2019['Oct']);
df2019['Nov'] = pd.to_numeric(df2019['Nov']);
df2019['Dec'] = pd.to_numeric(df2019['Dec']);
df2020['Jan'] = pd.to_numeric(df2020['Jan']);
df2020['Feb'] = pd.to_numeric(df2020['Feb']);
df2020['Mar'] = pd.to_numeric(df2020['Mar']);
df2020['Apr'] = pd.to_numeric(df2020['Apr']);
df2020['May'] = pd.to_numeric(df2020['May']);
df2020['Jun'] = pd.to_numeric(df2020['Jun']);
df2020['Jul'] = pd.to_numeric(df2020['Jul']);
df2020['Aug'] = pd.to_numeric(df2020['Aug']);
df2020['Sep'] = pd.to_numeric(df2020['Sep']);
df2020['Oct'] = pd.to_numeric(df2020['Oct']);
df2020['Nov'] = pd.to_numeric(df2020['Nov']);
df2020['Dec'] = pd.to_numeric(df2020['Dec']);
df2021['Jan'] = pd.to_numeric(df2021['Jan']);
df2021['Feb'] = pd.to_numeric(df2021['Feb']);
df2021['Mar'] = pd.to_numeric(df2021['Mar']);
df2021['Apr'] = pd.to_numeric(df2021['Apr']);
df2021['May'] = pd.to_numeric(df2021['May']);
df2021['Jun'] = pd.to_numeric(df2021['Jun']);
df2021['Jul'] = pd.to_numeric(df2021['Jul']);
df2021['Aug'] = pd.to_numeric(df2021['Aug']);
df2021['Sep'] = pd.to_numeric(df2021['Sep']);

# Add year columns for later differentiation
df2019['year'] = 2019
df2020['year'] = 2020
df2021['year'] = 2021

# Combine all three dataframes
frames = [df2019, df2020]
df19a20 = pd.concat(frames, ignore_index=True)
colnames = df19a20.columns.tolist()
colnames = [colnames[0]] + colnames[-1:] + colnames[1:-1]
df19a20 = df19a20[colnames]

# Check for blank cells and create NAN
df19a20.replace(r'^\s*$', np.nan, regex=True)
df19a20.replace(0, np.nan, regex=True)


# Change goodcarbadcar data model column to 'make' and 'model' seperate columns
df19a20[['Make', 'Model']] = df19a20['Model'].str.split(' ', 1, expand=True)
colnames = df19a20.columns.tolist()
colnames = colnames[-1:] + colnames[:-1]
df19a20 = df19a20[colnames]
df19a20.loc[(df19a20.Make == 'Alfa'),'Make']='Alfa Romeo'
df19a20.loc[(df19a20.Make == 'Alfa Romeo'),'Model']= df19a20['Model'].str.split(n=1).str[1]
df19a20.loc[(df19a20.Make == 'Land'),'Make']='Land Rover'
df19a20.loc[(df19a20.Make == 'Land Rover'),'Model']= df19a20['Model'].str.split(n=1).str[1]

# Quantify and melt the months into rows
df_sales_master = df19a20
df19a20 = df19a20.rename(columns={'Jan': '1', 'Feb': '2', 'Mar': '3', 'Apr': '4', 'May': '5', 'Jun': '6', 'Jul': '7', 'Aug': '8', 'Sep': '9', 'Oct': '10', 'Nov': '11', 'Dec': '12'})
df19a20 = pd.melt(df19a20, id_vars=['Make', 'Model', 'year'])

# Create a date column that formats the year and month into the datetime format
df19a20.insert(2, 'date', (df19a20[['year', 'variable']].astype(str).agg('-'.join, 1)), True)
df19a20['date'] = pd.to_datetime(df19a20['date'], format='%Y-%m').dt.strftime('%Y-%m')
df19a20 = df19a20.rename(columns={'value': 'sales'})
df19a20 = df19a20.drop(columns=['year', 'variable'])


# Perform the same changes on the 2021 data separately as it has a different number of columns

# Splitting
df2021[['Make', 'Model']] = df2021['Model'].str.split(' ', 1, expand=True)
colnames = df2021.columns.tolist()
colnames = colnames[-1:] + colnames[:-1]
df2021 = df2021[colnames]
df2021.loc[(df2021.Make == 'Alfa'),'Make']='Alfa Romeo'
df2021.loc[(df2021.Make == 'Alfa Romeo'),'Model']= df2021['Model'].str.split(n=1).str[1]
df2021.loc[(df2021.Make == 'Land'),'Make']='Land Rover'
df2021.loc[(df2021.Make == 'Land Rover'),'Model']= df2021['Model'].str.split(n=1).str[1]

# Melting
df2021 = df2021.rename(columns={'Jan': '1', 'Feb': '2', 'Mar': '3', 'Apr': '4', 'May': '5', 'Jun': '6', 'Jul': '7', 'Aug': '8', 'Sep': '9'})
df2021 = pd.melt(df2021, id_vars=['Make', 'Model', 'year'])
df2021.insert(2, 'date', (df2021[['year', 'variable']].astype(str).agg('-'.join, 1)), True)
df2021['date'] = pd.to_datetime(df2021['date'], format='%Y-%m').dt.strftime('%Y-%m')
df2021 = df2021.rename(columns={'value': 'sales'})
df2021 = df2021.drop(columns=['year', 'variable'])

frames = [df19a20, df2021]
df_sales_master = pd.concat(frames)

# Sort dataframe by date
df_sales_master = df_sales_master.sort_values(by='date')

# Reset index
df_sales_master = df_sales_master.reset_index(drop=True)
df_sales_master

df_sales_master


means = (dfu.groupby('year'))['price'].mean()
plt.figure(1, figsize=(20, 10))
means.plot(title = 'Price of Used Car Listings Over Time', ylabel = 'Price')

<AxesSubplot:title={'center':'Price of Used Car Listings Over Time'}, xlabel='year', ylabel='Price'>


# Combine Make and Model for the legend
df_sales_master_temp = df_sales_master.copy()
df_sales_master_temp['make_model'] = df_sales_master_temp['Make'] + ' ' + df_sales_master_temp['Model']

# Plot
sb.set(rc={'figure.figsize':(20,10)})
model_sale_plot = sb.lineplot(data=df_sales_master_temp, x='date', y='sales', hue='make_model', legend=False)
model_sale_plot.set_title('Vehicle Model Sales Over Time')

# Reduce x ticks
for ind, label in enumerate(model_sale_plot.get_xticklabels()):
    if ind % 3 == 0:
        label.set_visible(True)
    else:
        label.set_visible(False)


# Create new rows that sums the total sales of each Make
df_sales_master_makes = df_sales_master.copy()
df_sales_master_makes = df_sales_master_makes.groupby(['Make', 'date'], as_index=False).sum()
df_sales_master_makes

# Sort dataframe by date
df_sales_master_makes = df_sales_master_makes.sort_values(by='date')

# Plot
sb.set(rc={'figure.figsize':(20,10)})
make_sale_plot = sb.lineplot(data=df_sales_master_makes, x='date', y='sales', hue='Make')
make_sale_plot.set_title('Vehicle Make Sales Over Time')

# Reduce x ticks
for ind, label in enumerate(make_sale_plot.get_xticklabels()):
    if ind % 3 == 0:
        label.set_visible(True)
    else:
        label.set_visible(False)


# Create dataframe of sales data of 5 major brands
df_top5sales = df_sales_master_makes.copy()
df_top5sales = df_top5sales[df_top5sales['Make'].isin(['Ford','Toyota','Chevrolet','Honda','Nissan'])]
df_top5sales


# Create new column to store quantified date
df_sales_master_temp2 = df_top5sales.copy()
df_sales_master_temp2['date_quant'] = 0.0

# Quantify date
for index, row in df_sales_master_temp2.iterrows():
    yr = int(row['date'][0:4])
    mo = int(row['date'][5:7])
    dq = yr + (mo-1)/12
    df_sales_master_temp2.at[index, 'date_quant'] = dq

df_sales_master_temp2


# Predict Average sales
sale_reg = smf.ols(formula="sales ~ date_quant", data=df_sales_master_temp2[df_sales_master_temp2['date_quant'] < 2021]).fit()
sale_reg.summary()


# Plot residuals against time
df_sales_master_temp2['residual'] = np.nan
df_sales_master_temp2['residual'] = sale_reg.resid
sale_resid_plot = sb.violinplot(x='date', y='residual', data=df_sales_master_temp2[df_sales_master_temp2['date_quant'] < 2021])
sale_resid_plot.set_title("Residual of Linear Regression of Top 5 Brand Sales by Year")
for ind, label in enumerate(sale_resid_plot.get_xticklabels()):
    if ind % 3 == 0:
        label.set_visible(True)
    else:
        label.set_visible(False)


# Restrict data to greater than 2021
predictions = sale_reg.predict(df_sales_master_temp2[df_sales_master_temp2['date_quant'] >= 2021])
diff = predictions - df_sales_master_temp2[df_sales_master_temp2['date_quant'] >= 2021]['sales']

# Plot
plt.scatter(df_sales_master_temp2[df_sales_master_temp2['date_quant'] >= 2021]['sales'], diff)
plt.xlabel('Real Sales')
plt.ylabel('Difference From Real Sales')
plt.show()


print(predictions.mean())
print(predictions.std())
print(diff.mean())
print(diff.std())
print(df_sales_master_temp2[df_sales_master_temp2['date_quant'] >= 2021]['sales'].mean())
print(df_sales_master_temp2[df_sales_master_temp2['date_quant'] >= 2021]['sales'].std())

120374.4655072338
2743.5497614035226
-8833.090048321768
38636.47642455955
129207.55555555556
39336.760634964914


# predict cost of used car based on make, supertype, and year using multivariable linear regression with least squares
# Train with 80 percent of data and predict with remaining 20 percent

part_90 = dfu.sample(frac = 0.90, random_state=1)
rest_part_10 = dfu.drop(part_90.index)

model = smf.ols(formula="price ~ year * Make * supertype", data=part_90).fit()
model.summary()


dfuT = dfu[dfu['supertype'] != 'TRUCK']
part_90T = dfuT.sample(frac = 0.90, random_state=1)
rest_part_10T = dfu.drop(part_90T.index)

model2 = smf.ols(formula="price ~ year * Make * supertype", data=part_90T).fit()
model2.summary()


print(model.fvalue)
print(model2.fvalue)

1642.8343429735507
1404.1296173401522


dfu['residual'] = model.resid
ax = sb.violinplot(x='year', y='residual', data=dfu)
ax.set_title("Residual of the Linear Model Over Year")
for ind, label in enumerate(ax.get_xticklabels()):
    if ind % 5 == 4:  # every 10th label is kept
        label.set_visible(True)
    else:
        label.set_visible(False)


predictions = model.predict(rest_part_10)
predictions
diff = predictions - rest_part_10['price']

plt.scatter(rest_part_10['price'], diff)
plt.xlabel('Actual Price')
plt.ylabel('Difference from Estimated')

Text(0, 0.5, 'Difference from Estimated')


print(diff.mean())
print(diff.std())
print(dfu['price'].mean())
print(dfu['price'].std())

-11.423551396919807
7571.297427781543
19088.847586472348
13045.07653305824


print(diff.min())
print(rest_part_10.loc[diff.idxmin()])

-60778.30637697792
price                               62900
year                               2005.0
Make                                dodge
Model                   viper convertible
condition                        like new
fuel                                  gas
transmission                       manual
type                                coupe
state                                  ga
posting_date    2021-05-04 10:21:09-04:00
supertype                             CAR
Name: 121563, dtype: object


# Change in price compared to change in posting date
gp = dfu.groupby(['Make', 'Model', "year"])[['posting_date', 'price']].agg(list).reset_index()
test = gp.price.apply(lambda x: len(x) > 1)
gp = gp[test]
gp


gp = gp.reset_index()
gp['slope'] = 0
for i in range(len(gp)):
    slope = 0
    last = len(gp['price'][i])-1
    maxd = max(gp['posting_date'][i]).date()
    indmax = gp['posting_date'][i].index(maxd)
    mind = min(gp['posting_date'][i]).date()
    indmin = gp['posting_date'][i].index(mind)
    diff = ((maxd - mind).days)
    if diff == 0:
        diff = .1
    slope = (gp['price'][i][indmax] - gp['price'][i][indmin]) / diff
    gp['slope'][i] = slope
gp.head()


gp = gp[gp['slope'] < 100]
gp = gp[gp['slope'] > -100]
print(gp['slope'].mean())
print(gp['slope'].std())

0.0016732901066722444
35.926843850236246

	id	url	region	region_url	price	year	manufacturer	model	condition	cylinders	...	size	type	paint_color	image_url	description	county	state	lat	long	posting_date
0	7222695916	https://prescott.craigslist.org/cto/d/prescott...	prescott	https://prescott.craigslist.org	6000	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	az	NaN	NaN	NaN
1	7218891961	https://fayar.craigslist.org/ctd/d/bentonville...	fayetteville	https://fayar.craigslist.org	11900	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	ar	NaN	NaN	NaN
2	7221797935	https://keys.craigslist.org/cto/d/summerland-k...	florida keys	https://keys.craigslist.org	21000	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	fl	NaN	NaN	NaN
3	7222270760	https://worcester.craigslist.org/cto/d/west-br...	worcester / central MA	https://worcester.craigslist.org	1500	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	ma	NaN	NaN	NaN
4	7210384030	https://greensboro.craigslist.org/cto/d/trinit...	greensboro	https://greensboro.craigslist.org	4900	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	nc	NaN	NaN	NaN
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
426875	7301591192	https://wyoming.craigslist.org/ctd/d/atlanta-2...	wyoming	https://wyoming.craigslist.org	23590	2019.0	nissan	maxima s sedan 4d	good	6 cylinders	...	NaN	sedan	NaN	https://images.craigslist.org/00o0o_iiraFnHg8q...	Carvana is the safer way to buy a car During t...	NaN	wy	33.786500	-84.445400	2021-04-04T03:21:31-0600
426876	7301591187	https://wyoming.craigslist.org/ctd/d/atlanta-2...	wyoming	https://wyoming.craigslist.org	30590	2020.0	volvo	s60 t5 momentum sedan 4d	good	NaN	...	NaN	sedan	red	https://images.craigslist.org/00x0x_15sbgnxCIS...	Carvana is the safer way to buy a car During t...	NaN	wy	33.786500	-84.445400	2021-04-04T03:21:29-0600
426877	7301591147	https://wyoming.craigslist.org/ctd/d/atlanta-2...	wyoming	https://wyoming.craigslist.org	34990	2020.0	cadillac	xt4 sport suv 4d	good	NaN	...	NaN	hatchback	white	https://images.craigslist.org/00L0L_farM7bxnxR...	Carvana is the safer way to buy a car During t...	NaN	wy	33.779214	-84.411811	2021-04-04T03:21:17-0600
426878	7301591140	https://wyoming.craigslist.org/ctd/d/atlanta-2...	wyoming	https://wyoming.craigslist.org	28990	2018.0	lexus	es 350 sedan 4d	good	6 cylinders	...	NaN	sedan	silver	https://images.craigslist.org/00z0z_bKnIVGLkDT...	Carvana is the safer way to buy a car During t...	NaN	wy	33.786500	-84.445400	2021-04-04T03:21:11-0600
426879	7301591129	https://wyoming.craigslist.org/ctd/d/atlanta-2...	wyoming	https://wyoming.craigslist.org	30590	2019.0	bmw	4 series 430i gran coupe	good	NaN	...	NaN	coupe	NaN	https://images.craigslist.org/00Y0Y_lEUocjyRxa...	Carvana is the safer way to buy a car During t...	NaN	wy	33.779214	-84.411811	2021-04-04T03:21:07-0600

	Make	Model	date	sales
0	Acura	ILX	2019-01	805
1	Mercedes-Benz	AMG GT	2019-01	83
2	Mercedes-Benz	A-Class	2019-01	0
3	Mazda	MX-5 Miata	2019-01	347
4	Mazda	CX-9	2019-01	1860
...	...	...	...	...
9994	Ford	GT	2021-09	10
9995	Ford	Fusion	2021-09	42
9996	Ford	F-Series	2021-09	63164
9997	Ford	Transit	2021-09	9939
9998	Volvo	XC90	2021-09	2886

	Make	date	sales
858	Nissan	2019-01	90439
429	Honda	2019-01	96375
1035	Toyota	2019-01	138601
198	Chevrolet	2019-01	148602
330	Ford	2019-01	180766
...	...	...	...
362	Ford	2021-09	147737
230	Chevrolet	2021-09	96612
890	Nissan	2021-09	63548
461	Honda	2021-09	86303
1067	Toyota	2021-09	122495

	Make	date	sales	date_quant
858	Nissan	2019-01	90439	2019.000000
429	Honda	2019-01	96375	2019.000000
1035	Toyota	2019-01	138601	2019.000000
198	Chevrolet	2019-01	148602	2019.000000
330	Ford	2019-01	180766	2019.000000
...	...	...	...	...
362	Ford	2021-09	147737	2021.666667
230	Chevrolet	2021-09	96612	2021.666667
890	Nissan	2021-09	63548	2021.666667
461	Honda	2021-09	86303	2021.666667
1067	Toyota	2021-09	122495	2021.666667

Dep. Variable:	sales	R-squared:	0.028
Model:	OLS	Adj. R-squared:	0.020
Method:	Least Squares	F-statistic:	3.449
Date:	Mon, 20 Dec 2021	Prob (F-statistic):	0.0658
Time:	23:01:36	Log-Likelihood:	-1449.3
No. Observations:	120	AIC:	2903.
Df Residuals:	118	BIC:	2908.
Df Model:	1
Covariance Type:	nonrobust

Analysis of The United States Automotive Market

Rahul Jasmin & Krishna Gajjala

Introduction¶

Why is this important?¶

Goal¶

Data Collection¶

Used Car Data¶

New Car Data¶

Data Processing¶

Used Car Data¶

Sales Data¶

Exploratory Analysis¶

Used Car Data: Visualizing the Price of Used Cars Over Time¶

New Car Data: Visualizing Sales Over Time¶

Regression and Analysis¶

New Car Data: Predicting Future Sales¶

Used Car Data: Predict The Cost of a Used Car¶

Used Car Data: Price vs Posting Date¶

Conclusions¶

	price	year	Make	Model	condition	fuel	transmission	type	state	posting_date
30	30990	2017.0	toyota	tundra double cab sr	good	gas	other	pickup	al	2021-05-04T10:41:31-0500
31	15000	2013.0	ford	f-150 xlt	excellent	gas	automatic	truck	al	2021-05-03T14:02:03-0500
32	27990	2012.0	gmc	sierra 2500 hd extended cab	good	gas	other	pickup	al	2021-05-03T13:41:25-0500
33	34590	2016.0	chevrolet	silverado 1500 double	good	gas	other	pickup	al	2021-05-03T12:41:33-0500
34	35000	2019.0	toyota	tacoma	excellent	gas	automatic	truck	al	2021-05-03T12:12:59-0500

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	2.561e+07	1.37e+07	1.867	0.064	-1.55e+06	5.28e+07
date_quant	-1.261e+04	6789.153	-1.857	0.066	-2.61e+04	835.977

Omnibus:	10.163	Durbin-Watson:	1.797
Prob(Omnibus):	0.006	Jarque-Bera (JB):	4.233
Skew:	-0.166	Prob(JB):	0.120
Kurtosis:	2.142	Cond. No.	7.07e+06

Omnibus:	47845.201	Durbin-Watson:	2.001
Prob(Omnibus):	0.000	Jarque-Bera (JB):	128245.848
Skew:	1.048	Prob(JB):	0.00
Kurtosis:	5.839	Cond. No.	1.01e+16

Omnibus:	48043.773	Durbin-Watson:	2.013
Prob(Omnibus):	0.000	Jarque-Bera (JB):	171184.106
Skew:	1.246	Prob(JB):	0.00
Kurtosis:	6.919	Cond. No.	1.02e+16

	Make	Model	year	posting_date	price
2	acura	3.2 cl type s	2003.0	[2021-04-29 21:03:12-04:00, 2021-04-24 11:04:0...	[9999, 6000, 5950, 9999, 9999]
3	acura	3.2 tl	2000.0	[2021-05-04 09:40:31-07:00, 2021-04-23 13:14:3...	[3000, 2300]
5	acura	3.2 tl	2003.0	[2021-04-13 22:56:14-07:00, 2021-04-30 07:07:3...	[5500, 5995, 1299]
6	acura	3.2 tl	2004.0	[2021-05-03 15:22:49-06:00, 2021-04-30 10:02:1...	[2300, 4500]
7	acura	3.2 tl	2005.0	[2021-05-01 10:39:28-05:00, 2021-04-27 11:10:2...	[4500, 5000, 3200]
...	...	...	...	...	...
45495	volvo	xc90 t6 momentum sport	2017.0	[2021-05-03 14:11:31-07:00, 2021-05-03 14:51:2...	[36990, 36990, 36990, 36990, 36990]
45498	volvo	xc90 t6 r design awd gas	2016.0	[2021-05-02 12:45:55-06:00, 2021-04-25 17:53:0...	[33999, 33999, 33999, 33999, 33999]
45499	volvo	xc90 t8 hybrid inscription	2019.0	[2021-05-03 20:09:35-07:00, 2021-05-03 16:03:3...	[35000, 35000, 33000, 33000, 35000, 35000]
45501	volvo	xc90 v8	2008.0	[2021-04-27 16:57:56-07:00, 2021-04-17 17:54:4...	[8995, 5900]
45505	volvo	xc90t6 awd 24733 miles	2018.0	[2021-04-29 10:01:20-07:00, 2021-04-23 13:17:5...	[42998, 42998, 42998, 42998, 42998, 42998, 429...

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	-2.834e+06	6.6e+04	-42.927	0.000	-2.96e+06	-2.7e+06
Make[T.alfa-romeo]	-1.715e+06	8.08e+05	-2.122	0.034	-3.3e+06	-1.31e+05
Make[T.aston-martin]	1.068e+07	1.08e+06	9.923	0.000	8.57e+06	1.28e+07
Make[T.audi]	-1.708e+06	8.9e+04	-19.179	0.000	-1.88e+06	-1.53e+06
Make[T.bmw]	-6.011e+05	7.62e+04	-7.890	0.000	-7.5e+05	-4.52e+05
Make[T.buick]	8.484e+05	9.93e+04	8.546	0.000	6.54e+05	1.04e+06
Make[T.cadillac]	-8.827e+05	8.65e+04	-10.209	0.000	-1.05e+06	-7.13e+05
Make[T.chevrolet]	1.381e+06	7.38e+04	18.705	0.000	1.24e+06	1.53e+06
Make[T.chrysler]	4.573e+05	9.29e+04	4.924	0.000	2.75e+05	6.39e+05
Make[T.dodge]	-1.244e+06	9.19e+04	-13.542	0.000	-1.42e+06	-1.06e+06
Make[T.ferrari]	3.316e+06	4.72e+06	0.703	0.482	-5.93e+06	1.26e+07
Make[T.fiat]	-1.428e+06	3.38e+05	-4.222	0.000	-2.09e+06	-7.65e+05
Make[T.ford]	4.061e+05	7.38e+04	5.505	0.000	2.61e+05	5.51e+05
Make[T.gmc]	3.424e+05	7.83e+05	0.437	0.662	-1.19e+06	1.88e+06
Make[T.harley-davidson]	2.152e+06	8.38e+05	2.568	0.010	5.1e+05	3.8e+06
Make[T.honda]	5.069e+05	7.35e+04	6.896	0.000	3.63e+05	6.51e+05
Make[T.hyundai]	3.972e+05	8.79e+04	4.518	0.000	2.25e+05	5.69e+05
Make[T.infiniti]	-8.486e+05	9.46e+04	-8.966	0.000	-1.03e+06	-6.63e+05
Make[T.jaguar]	-4.646e+04	1.12e+05	-0.416	0.677	-2.65e+05	1.72e+05
Make[T.jeep]	-3.376e+05	1.93e+05	-1.751	0.080	-7.15e+05	4.02e+04
Make[T.kia]	-1.978e+05	1.12e+05	-1.770	0.077	-4.17e+05	2.12e+04
Make[T.land rover]	-5.217e+05	6.45e+05	-0.809	0.418	-1.79e+06	7.42e+05
Make[T.lexus]	-5.879e+05	8.29e+04	-7.092	0.000	-7.5e+05	-4.25e+05
Make[T.lincoln]	-4e+05	9.37e+04	-4.270	0.000	-5.84e+05	-2.16e+05
Make[T.mazda]	4.53e+05	9.96e+04	4.549	0.000	2.58e+05	6.48e+05
Make[T.mercedes-benz]	-7.064e+05	8.01e+04	-8.817	0.000	-8.63e+05	-5.49e+05
Make[T.mercury]	2.463e+06	2.38e+05	10.355	0.000	2e+06	2.93e+06
Make[T.mini]	-5.55e+05	1.19e+05	-4.666	0.000	-7.88e+05	-3.22e+05
Make[T.mitsubishi]	1.136e+06	1.09e+05	10.415	0.000	9.22e+05	1.35e+06
Make[T.nissan]	4.034e+05	7.99e+04	5.048	0.000	2.47e+05	5.6e+05
Make[T.pontiac]	3.016e+06	1.89e+05	15.941	0.000	2.65e+06	3.39e+06
Make[T.porsche]	-4.683e+05	1.62e+05	-2.897	0.004	-7.85e+05	-1.51e+05
Make[T.ram]	5.43e+05	1.03e+06	0.528	0.598	-1.47e+06	2.56e+06
Make[T.rover]	9.092e+06	2.65e+06	3.429	0.001	3.9e+06	1.43e+07
Make[T.saturn]	1.762e+06	3.21e+05	5.491	0.000	1.13e+06	2.39e+06
Make[T.subaru]	-3.559e+05	9.3e+04	-3.829	0.000	-5.38e+05	-1.74e+05
Make[T.tesla]	9.998e+05	2.32e+05	4.303	0.000	5.44e+05	1.46e+06
Make[T.toyota]	7.258e+05	7.33e+04	9.905	0.000	5.82e+05	8.69e+05
Make[T.volkswagen]	2.754e+05	8.39e+04	3.282	0.001	1.11e+05	4.4e+05

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	-2.824e+06	6.13e+04	-46.041	0.000	-2.94e+06	-2.7e+06
Make[T.alfa-romeo]	-1.573e+06	7.56e+05	-2.079	0.038	-3.05e+06	-9.04e+04
Make[T.aston-martin]	7.414e+06	7.15e+05	10.376	0.000	6.01e+06	8.81e+06
Make[T.audi]	-1.681e+06	8.27e+04	-20.324	0.000	-1.84e+06	-1.52e+06
Make[T.bmw]	-6.243e+05	7.1e+04	-8.791	0.000	-7.63e+05	-4.85e+05
Make[T.buick]	8.319e+05	9.24e+04	9.000	0.000	6.51e+05	1.01e+06
Make[T.cadillac]	-8.948e+05	8.04e+04	-11.123	0.000	-1.05e+06	-7.37e+05
Make[T.chevrolet]	1.389e+06	6.87e+04	20.210	0.000	1.25e+06	1.52e+06
Make[T.chrysler]	4.368e+05	8.64e+04	5.057	0.000	2.67e+05	6.06e+05
Make[T.dodge]	-1.305e+06	8.61e+04	-15.163	0.000	-1.47e+06	-1.14e+06
Make[T.ferrari]	3.306e+06	4.42e+06	0.749	0.454	-5.35e+06	1.2e+07
Make[T.fiat]	-1.493e+06	3.24e+05	-4.604	0.000	-2.13e+06	-8.57e+05
Make[T.ford]	3.627e+05	6.87e+04	5.281	0.000	2.28e+05	4.97e+05
Make[T.gmc]	1.873e+05	7.36e+05	0.254	0.799	-1.26e+06	1.63e+06
Make[T.harley-davidson]	2.336e+06	3.01e+05	7.759	0.000	1.75e+06	2.93e+06
Make[T.honda]	4.981e+05	6.84e+04	7.286	0.000	3.64e+05	6.32e+05
Make[T.hyundai]	3.704e+05	8.19e+04	4.524	0.000	2.1e+05	5.31e+05
Make[T.infiniti]	-8.663e+05	8.86e+04	-9.779	0.000	-1.04e+06	-6.93e+05
Make[T.jaguar]	-4.254e+04	1.06e+05	-0.403	0.687	-2.5e+05	1.65e+05
Make[T.jeep]	-4.016e+05	1.78e+05	-2.258	0.024	-7.5e+05	-5.3e+04
Make[T.kia]	-1.73e+05	1.03e+05	-1.675	0.094	-3.75e+05	2.94e+04
Make[T.land rover]	-5.29e+05	6.27e+05	-0.843	0.399	-1.76e+06	7.01e+05
Make[T.lexus]	-5.77e+05	7.7e+04	-7.495	0.000	-7.28e+05	-4.26e+05
Make[T.lincoln]	-3.492e+05	8.67e+04	-4.029	0.000	-5.19e+05	-1.79e+05
Make[T.mazda]	4.422e+05	9.29e+04	4.759	0.000	2.6e+05	6.24e+05
Make[T.mercedes-benz]	-7.281e+05	7.46e+04	-9.757	0.000	-8.74e+05	-5.82e+05
Make[T.mercury]	2.417e+06	2.18e+05	11.065	0.000	1.99e+06	2.85e+06
Make[T.mini]	-5.479e+05	1.12e+05	-4.912	0.000	-7.67e+05	-3.29e+05
Make[T.mitsubishi]	1.132e+06	1.03e+05	10.987	0.000	9.3e+05	1.33e+06
Make[T.nissan]	3.821e+05	7.45e+04	5.129	0.000	2.36e+05	5.28e+05
Make[T.pontiac]	3.166e+06	1.78e+05	17.827	0.000	2.82e+06	3.51e+06
Make[T.porsche]	-4.81e+05	1.52e+05	-3.171	0.002	-7.78e+05	-1.84e+05
Make[T.ram]	3.19e+05	9.15e+05	0.349	0.727	-1.47e+06	2.11e+06
Make[T.rover]	6.066e+06	1.9e+06	3.192	0.001	2.34e+06	9.79e+06
Make[T.saturn]	1.74e+06	2.92e+05	5.968	0.000	1.17e+06	2.31e+06
Make[T.subaru]	-3.536e+05	8.72e+04	-4.057	0.000	-5.24e+05	-1.83e+05
Make[T.tesla]	9.555e+05	2.14e+05	4.468	0.000	5.36e+05	1.37e+06
Make[T.toyota]	6.991e+05	6.82e+04	10.251	0.000	5.65e+05	8.33e+05
Make[T.volkswagen]	2.483e+05	7.83e+04	3.172	0.002	9.49e+04	4.02e+05