"""
Preliminaries
"""
import pandas as pd
import numpy as np
from scipy.optimize import minimize
#!pip install pyblp
import pyblp
from numba import jit, njit, prange

import time
time_start = time.time()

import warnings
warnings.filterwarnings("ignore", category=Warning) ## Supress all warnings

"""
Read in data
"""
## Product data
product = pd.read_csv(pyblp.data.NEVO_PRODUCTS_LOCATION)
product ["cons"] = 1

## Consumer data
consumer = pd.read_csv(pyblp.data.NEVO_AGENTS_LOCATION)
demographic = consumer[["income", "income_squared", "age", "child"]].values

"""
Some data cleansing
"""

## Number of goods per market
J = product.groupby("market_ids").sum(numeric_only=True).cons.values

## Number of markets
M = len(J)

## Number of simulations per market (Accorigin to Nevo, 2000, page 538)
N = 20

"""
We specify the regressors in $\bar{V}$ (linear) and $\tilde{V}$ (non-linear).
The two sets of regressors are not necessarily the same, because we may not want
random coefficients (the $\beta_i$s) over all the regressors.
"""
## The (linear) regressors in V_bar
X1 = np.hstack((product[["prices"]], pd.get_dummies(product["product_ids"])))

## The (non-linear) regressors in V_tilde, over which we want random
# coefficients (so no fixed effects)
X2 = product[["cons", "prices", "sugar", "mushy"]].values

## Number of nonlinear parameters (k=3 because we have "prices", "sugar", "mushy")
k = X2.shape[1] - 1

## Insruments
iv_cols = [col for col in product.columns if 'demand' in col]
Z = np.hstack((product[iv_cols], pd.get_dummies(product["product_ids"])))

""" 
Monte carlo simulation for \nu_i
- k+1: k from x, 1 from p
- M: different draws for each market
- N: number of draws
"""
np.random.seed(0)
v = np.reshape(np.random.standard_normal((k + 1) * N * M), (M * N, k + 1))

"""
Class delta
"""
## Shares of the outside good (base category) (See Nevo, 2000, page 520)
product["outside"] = product["shares"].groupby(product["market_ids"]).transform("sum")

## Initial delta: log(share) - log(share(outside good)) (normalization)
delta_0 = np.log(product["shares"]) - np.log(product["outside"])

## Class for updating delta
class delta:
    def __init__(self, delta):
        self.delta = np.array(delta)

## Initialize a delta object using delta_0
d = delta(delta_0)

"""
Define functions
"""

"""
Common inputs:

- x1: non-price product attributes (linear)
- x2: non-price product attributes (nonlinear)
- p: price
- v: monte carlo draws for \nu
- demographic: comsumer demographics
- delta: initial guess for the mean utility
- sigma: Sigma for nonlinear parameters
- pi: Pi for nonlinear parameters
- J: vector of number of goods per market
- M: numer of markets
- N: number of draws
"""

"""
Function: fill in (deterministic) utility elements
"""
@njit(parallel = True) ## run in parallel
def emax_iter(out, x2, v, demographic, delta, sigma, pi, J, M, N):
    for i in prange(N): ## Iterate over the individuals

        mj = 0
        
        for m in prange(M): # Iterate over the markets
            
            mktSize = J[m] # Market size of market m
            
            for j in prange(mktSize): # Iterate over all goods in market m
                
                ## u(ijm), equation (4)
                out[mj, i] = delta[mj] + \
                x2[mj, 0] * (v[N * m + i, 0] * sigma[0] +
                             np.dot(pi[0,:], demographic[N * m + i,:])) + \
                x2[mj, 1] * (v[N * m + i, 1] * sigma[1] + 
                             np.dot(pi[1,:], demographic[N * m + i,:])) + \
                x2[mj, 2] * (v[N * m + i, 2] * sigma[2] + 
                             np.dot(pi[2,:], demographic[N * m + i,:])) + \
                x2[mj, 3] * (v[N * m + i, 3] * sigma[3] + 
                             np.dot(pi[3,:], demographic[N * m + i,:]))
                mj += 1
    return out


"""
Function: return indirect utility matrix
"""
@jit(forceobj=True)
def indirect_utility(x2, v, demographic, delta, sigma, pi, J, M, N):
    sigma = np.abs(sigma) ## positve Sigma
    
    out = np.zeros((sum(J), N)) ## initialization
    
    ## Fixed point iteration
    out = emax_iter(out, x2, v, demographic, delta, sigma, pi, J, M, N)
    
    return out

"""
Function: compute estimated market shares

outputs:
- q: quantities consumed
- s: market shares
"""
@jit(forceobj=True)
def mkt_share(x2, v, demographic, delta, sigma, pi, J, M, N):
    
    ## Quantities bought (initialization)
    q = np.zeros((np.sum(J), N))
    
    ## Indirect utilities
    u = indirect_utility(x2, v, demographic, delta, sigma, pi, J, M, N)
    
    ## Exponentiate the utilities
    exp_u = np.exp(u)
    
    ## Outside good
    first_good = 0
            
    for m in range(M):
        
        mktSize = J[m] ## market size of market m
        
        numer = exp_u[first_good:first_good + mktSize,:] ## numerator for shares

        denom = 1 + numer.sum(axis = 0) ## demoninator for shares
        
        q[first_good:first_good + mktSize,:] = numer/denom ## quantity(ijm)
        
        first_good += mktSize
    
    ## Shares. We use equal weight for all goods.
    s = np.matmul(q, np.repeat(1/N, N))
    
    return [q,s]

"""
Function: Solve delta using the iterative process
"""

@jit(forceobj=True)
def solve_delta(s, x2, v, demographic, delta, sigma, pi, J, M, N, tol):
    
    diff = 1
    
    delta_old = delta
    
    while diff > tol:

        ## The estimated shares
        q_s = mkt_share(x2, v, demographic, delta_old, sigma, pi, J, M, N)

        sigma_jm = q_s[1]
        
        ## Contraction mapping
        delta_new = delta_old + np.log(s/sigma_jm)
        
        ## Update the difference between iterates
        diff = np.max(np.abs(delta_new - delta_old))
        
        delta_old = delta_new.copy()
    
    return delta_old

"""
Function: calculate value of the GMM objective function
"""

def objective(params, s, x1, x2, v, demographic, J, M, N, tol, Z, W):
    
    ## Initialization: theta_0
    sigma = params[0:4]
    pi = params[4:].reshape((4,4))
    pi[[0,2,3],1] = 0
    pi[[0,2,3],3] = 0
    pi[1,2] = 0
    
    ## Number of obs. JxM
    obs = np.sum(J)
    
    
    if np.min(sigma) < 0:
        return 1e20
    
    else:

        d.delta = solve_delta(s, x2, v, demographic, d.delta, sigma, pi, J, M, N, tol) ## inner loop

        delta_vector = d.delta.reshape((-1,1))

        ## Linear parameters (alpha, beta) given theta_0, FOC
        b = np.linalg.inv(x1.T @ Z @ W @ Z.T @ x1) @ (x1.T @ Z @ W @ Z.T @ delta_vector)

        ## Estimate for error term \xi
        xi = delta_vector - x1 @ b

        g = Z.T @ xi / np.size(xi, axis = 0)

        obj = float(obs ** 2 * g.T @ W @ g)
        
        return obj

"""
Starting values

- We choose the same starting values for theta and W as Nevo (2000)
"""
    
## Initial values for theta
sigma = [   .377, 1.848, 0.004, 0.081 ]
pi1 =   [   3.09,     0, 1.186,     0 ]
pi2 =   [   16.6,  -.66,     0,  11.6 ]
pi3 =   [ -0.193,     0,  0.03,     0 ]
pi4 =   [  1.468,     0,  -1.5,     0 ]

params = np.hstack((sigma, pi1, pi2, pi3, pi4))

## Initial weighting matrix
W0 = np.linalg.inv(Z.T @ Z)

"""
Get the optimal weight matrix
"""

## Using initial weight matrix W0
params_init_wt = minimize(objective,
                          params, 
                          args = (product.shares.values, X1, X2, v, demographic, J, M, N, 1e-4, Z, W0), 
                          method = "Nelder-Mead")

params_2 = params_init_wt.x
sigma_2 = params_2[0:4]
pi_2 = params_2[4:].reshape((4,4))

d.delta = solve_delta(product.shares.values, X2, v, demographic, d.delta, sigma_2, pi_2, J, M, N, 1e-4) ## inner loop
delta_vector = d.delta.reshape((-1,1))

b = np.linalg.inv(X1.T @ Z @ W0 @ Z.T @ X1) @ (X1.T @ Z @ W0 @ Z.T @ delta_vector)

xi = delta_vector - X1 @ b

## Update weighting matrix
g_ind = Z * xi
obs = np.sum(J)
vg = g_ind.T @ g_ind / obs

## Optimal weighting matrix
Wo = np.linalg.inv(vg)

"""
NFP with optimal weighting matrix
"""
params_optimal = minimize(objective, 
                          params, 
                          args = (product.shares.values, X1, X2, v, demographic, J, M, N, 1e-4, Z, Wo), 
                          method = "Nelder-Mead")

params_estimated = params_optimal.x

time_end = time.time()
elapsed_time = time_end - time_start 
elapsed_minutes = int(elapsed_time / 60)
elapsed_seconds = int(elapsed_time % 60)

print("Elapsed time: {}m {}s".format(elapsed_minutes, elapsed_seconds))

Elapsed time: 4m 4s

"""
Print Results
"""

print('-' * 20, " Replication results ", '-' * 20, '\n')

print("Context: \n Length of x vector (k) = ", k, ",\n Number of markets (M) = ", M, ", \n Number of draws (N) = ", N, '\n')

# print("Estimates shares = \n", mkt_share(X2, v, demographic, d.delta, params_estimated[0:(k+1)], params_estimated[(k+1):].reshape(((k+1),(k+1))), J, M, N),  "\n") ## the estimated market shares

print("GMM objective value = ", round(objective(params_estimated, product.shares.values, X1, X2, v, demographic, J, M, N, 1e-4, Z, Wo)/np.sum(J), 1), "; Nevo's result =  14.9 \n")

print("alpha(price) = ", round(b[0][0], 3), "; Nevo's result =  -32.433 \n")

# print("beta = ", b, "\n") ## the whole vector of linear parameters (alpha, beta), the first element of which is the price coefficient alpha

print("Sigma = \n", params_estimated[0:(k+1)],"\n")

print("Pi = \n", params_estimated[(k+1):].reshape(((k+1),(k+1))),"\n")

--------------------  Replication results  -------------------- 

Context: 
 Length of x vector (k) =  3 ,
 Number of markets (M) =  94 , 
 Number of draws (N) =  20 

GMM objective value =  15.4 ; Nevo's result =  14.9 

alpha(price) =  -38.072 ; Nevo's result =  -32.433 

Sigma = 
 [2.49897746e-04 9.33870292e+00 2.45505416e-06 3.07782234e-02] 

Pi = 
 [[ 3.34507418  0.          1.58247038  0.        ]
 [ 1.64308535  0.2381023   0.         11.13164642]
 [-0.27366374  0.          0.02471842  0.        ]
 [ 2.20558565  0.         -1.99445009  0.        ]]

The BLP Method of Demand Curve Estimation¶

Model¶

Empirical Strategy¶

The Endogeneity of Price¶

The Outer Loop¶

The Inner Loop¶

The Nested Fixed Point Algorithm (NFP)¶

Estimation¶

Resources¶

Steps¶

Example: Replicate Nevo (2000)¶

References¶