"""
Preliminaries
"""
# !pip install pandas pyblp numba
import pandas as pd
import numpy as np
from scipy.optimize import minimize
import pyblp
from numba import jit, njit, prange
import time
import warnings
warnings.filterwarnings("ignore", category=Warning) ## Supress all warnings

"""
BLP Estimation

- x1: non-price product attributes (linear)
- x2: non-price product attributes (nonlinear)
- p: price
- v: monte carlo draws for nu
- demographic: consumer demographics

- J: vector of number of goods per market
- M: number of markets
- N: number of draws

- delta: mean utility
- sigma: Sigma for nonlinear parameters
- pi: Pi for nonlinear parameters
"""

class DeltaManager:
    """
    Manages delta values throughout the estimation process
    """
    def __init__(self, delta):
        self.delta = np.array(delta)

class BLPEstimator:
    def __init__(self):
        self.time_start = time.time()
        self._initialize_data()
        self._setup_model()
        
    def _initialize_data(self):
        
        """
        Read in data
        """
        ## Product data
        self.product = pd.read_csv(pyblp.data.NEVO_PRODUCTS_LOCATION)
        self.product["cons"] = 1
        
        ## Consumer data
        self.consumer = pd.read_csv(pyblp.data.NEVO_AGENTS_LOCATION)
        self.demographic = self.consumer[["income", "income_squared", "age", "child"]].values
        
        """
        Some data cleansing
        """
        ## Market structure
        self.J = self.product.groupby("market_ids").sum(numeric_only=True).cons.values ## Number of goods per market
        self.M = len(self.J) ## Number of markets
        self.N = 20  ## Number of simulations per market (Accorigin to Nevo, 2000, page 538)

    def _setup_model(self):
        
        """
        We specify the regressors in $\bar{V}$ (linear) and $\tilde{V}$ (non-linear). The two sets of regressors are not necessarily the same, because we may not want random coefficients (the $\beta_i$s) over all the regressors.
        """
        ## The (linear) regressors in V_bar
        self.X1 = np.hstack((self.product[["prices"]], 
                            pd.get_dummies(self.product["product_ids"]))) 
        ## The (non-linear) regressors in V_tilde, over which we want random coefficients (so no fixed effects)
        self.X2 = self.product[["cons", "prices", "sugar", "mushy"]].values

        ## Number of nonlinear parameters (k=3 because we have "prices", "sugar", "mushy")
        self.k = self.X2.shape[1] - 1
        
        ## Insruments
        iv_cols = [col for col in self.product.columns if 'demand' in col]
        self.Z = np.hstack((self.product[iv_cols], 
                           pd.get_dummies(self.product["product_ids"])))
        
        
        """ 
        Monte carlo simulation for nu{i}
        - k+1: k from x, 1 from p
        - M: different draws for each market
        - N: number of draws
        """
        np.random.seed(0)
        self.v = np.reshape(np.random.standard_normal((self.k + 1) * self.N * self.M), (self.M * self.N, self.k + 1))
        
        """
        Delta
        """
        ## Shares of the outside good (base category) (See Nevo, 2000, page 520)
        self.product["outside"] = self.product["shares"].groupby(
            self.product["market_ids"]).transform("sum")
        
        ## Initial delta: log(share) - log(share(outside good)) (normalization)
        delta_0 = np.log(self.product["shares"]) - np.log(self.product["outside"])

        ## Initialize a delta object using delta_0
        self.d = DeltaManager(delta_0)

    @staticmethod
    @njit(parallel=True)
    def _emax_iter(out, x2, v, demographic, delta, sigma, pi, J, M, N):
        """
        Parallel computation of (deterministic) utility elements
        """
        for i in prange(N):
            mj = 0
            for m in prange(M):
                mktSize = J[m]
                for j in prange(mktSize):
                    out[mj, i] = delta[mj] + \
                        x2[mj, 0] * (v[N * m + i, 0] * sigma[0] +
                                    np.dot(pi[0,:], demographic[N * m + i,:])) + \
                        x2[mj, 1] * (v[N * m + i, 1] * sigma[1] + 
                                    np.dot(pi[1,:], demographic[N * m + i,:])) + \
                        x2[mj, 2] * (v[N * m + i, 2] * sigma[2] + 
                                    np.dot(pi[2,:], demographic[N * m + i,:])) + \
                        x2[mj, 3] * (v[N * m + i, 3] * sigma[3] + 
                                    np.dot(pi[3,:], demographic[N * m + i,:]))
                    mj += 1
        return out

    @staticmethod
    @jit(forceobj=True)
    def _indirect_utility(x2, v, demographic, delta, sigma, pi, J, M, N):
        """
        Compute indirect utility matrix
        """
        sigma = np.abs(sigma) ## positive Sigma
        out = np.zeros((sum(J), N)) ## initialization
        return BLPEstimator._emax_iter(out, x2, v, demographic, delta, sigma, pi, J, M, N)
    
    @staticmethod
    @jit(forceobj=True)
    def _mkt_share(x2, v, demographic, delta, sigma, pi, J, M, N):
        """
        Compute market shares
        - q: quantities consumed
        - s: market shares
        """
        q = np.zeros((np.sum(J), N)) ## Quantities bought (initialization)
        u = BLPEstimator._indirect_utility(x2, v, demographic, delta, sigma, pi, J, M, N) ## Indirect utilities
        exp_u = np.exp(u) ## Exponentiate the utilities
        first_good = 0 ## Outside good

        for m in range(M):
            mktSize = J[m] ## market size of market m
            numer = exp_u[first_good:first_good + mktSize,:] ## numerator for shares
            denom = 1 + numer.sum(axis=0) ## denominator for shares
            q[first_good:first_good + mktSize,:] = numer/denom ## quantity(ijm)
            first_good += mktSize
        
        s = np.matmul(q, np.repeat(1/N, N)) ## Shares. We use equal weight for all goods.
        return [q, s]
    
    @staticmethod
    @jit(forceobj=True)
    def _solve_delta(s, x2, v, demographic, delta, sigma, pi, J, M, N, tol):
        """
        Solve delta using the iterative process / contraction mapping
        """
        delta_old = delta.copy()
        diff = 1
        
        while diff > tol:
            sigma_jm = BLPEstimator._mkt_share(x2, v, demographic, delta_old, sigma, pi, J, M, N)[1] ## The estimated shares
            delta_new = delta_old + np.log(s/sigma_jm) ## Contraction mapping
            diff = np.max(np.abs(delta_new - delta_old)) ## Update the difference between iterates
            delta_old = delta_new.copy()
        
        return delta_old
    
    def _objective(self, params, s, x1, x2, v, demographic, J, M, N, tol, Z, W):
        """
        Calculate value of GMM objective function
        """
        ## Initialization: theta_0
        sigma = params[0:4]
        pi = params[4:].reshape((4,4))
        pi[[0,2,3],1] = 0
        pi[[0,2,3],3] = 0
        pi[1,2] = 0
        
        if np.min(sigma) < 0:
            return 1e20
        
        ## Inner loop
        self.d.delta = self._solve_delta(s, x2, v, demographic, self.d.delta, sigma, pi, J, M, N, tol)
        delta_vector = self.d.delta.reshape((-1,1))
        
        ## Linear parameters (alpha, beta) given theta_0, FOC
        b = np.linalg.inv(x1.T @ Z @ W @ Z.T @ x1) @ (x1.T @ Z @ W @ Z.T @ delta_vector)

        xi = delta_vector - x1 @ b ## Error term \xi
        g = Z.T @ xi / np.size(xi, axis=0)
        
        return float(np.sum(J) ** 2 * g.T @ W @ g) ## np.sum(J): Number of obs, JxM
    
    def estimate(self):
        """
        Perform BLP estimation
        - We choose the same starting values for theta and W as Nevo (2000)
        """
        ## Initial values for theta
        sigma = [.377, 1.848, 0.004, 0.081]
        pi1 = [3.09, 0, 1.186, 0]
        pi2 = [16.6, -.66, 0, 11.6]
        pi3 = [-0.193, 0, 0.03, 0]
        pi4 = [1.468, 0, -1.5, 0]

        params = np.hstack((sigma, pi1, pi2, pi3, pi4))
        
        ## Initial weighting matrix
        W0 = np.linalg.inv(self.Z.T @ self.Z) 

        ## Get the optimal weight matrix
        params_init_wt = minimize(self._objective, params,
                                args=(self.product.shares.values, self.X1, self.X2,
                                    self.v, self.demographic, self.J, self.M,
                                    self.N, 1e-4, self.Z, W0),
                                method="Nelder-Mead")
        
        params_2 = params_init_wt.x
        sigma_2 = params_2[0:4]
        pi_2 = params_2[4:].reshape((4,4))
        
        self.d.delta = self._solve_delta(self.product.shares.values, self.X2,
                                    self.v, self.demographic, self.d.delta,
                                    sigma_2, pi_2, self.J, self.M, self.N, 1e-4) ## Inner loop
        delta_vector = self.d.delta.reshape((-1,1))
        
        b = np.linalg.inv(self.X1.T @ self.Z @ W0 @ self.Z.T @ self.X1) @ \
            (self.X1.T @ self.Z @ W0 @ self.Z.T @ delta_vector)
        xi = delta_vector - self.X1 @ b
        
        ## Update weight matrix
        g_ind = self.Z * xi
        vg = g_ind.T @ g_ind / np.sum(self.J)

        ## Optimal weighting matrix
        Wo = np.linalg.inv(vg)
        
        ## NFXP with optimal weight matrix
        params_optimal = minimize(self._objective, params,
                                args=(self.product.shares.values, self.X1, self.X2,
                                    self.v, self.demographic, self.J, self.M,
                                    self.N, 1e-4, self.Z, Wo),
                                method="Nelder-Mead")
        
        params_estimated = params_optimal.x
        
        ## Calculate final results
        final_obj = self._objective(params_estimated, self.product.shares.values,
                                self.X1, self.X2, self.v, self.demographic,
                                self.J, self.M, self.N, 1e-4, self.Z, Wo)
        
        delta_vector = self.d.delta.reshape((-1,1))
        b = np.linalg.inv(self.X1.T @ self.Z @ Wo @ self.Z.T @ self.X1) @ \
            (self.X1.T @ self.Z @ Wo @ self.Z.T @ delta_vector)
        
        return params_estimated, b, final_obj
    
    def print_results(self, params_estimated, b, final_obj):
        elapsed_time = time.time() - self.time_start
        elapsed_minutes = int(elapsed_time / 60)
        elapsed_seconds = int(elapsed_time % 60)
        
        print("Elapsed time: {}m {}s \n".format(elapsed_minutes, elapsed_seconds))
        print('-' * 20, " Replication results ", '-' * 20, '\n')
        print("Context: \n Length of x vector (k) = ", self.k,
            ",\n Number of markets (M) = ", self.M,
            ", \n Number of draws (N) = ", self.N, '\n')
        print("GMM objective value = ", round(final_obj/np.sum(self.J), 1), "; Nevo's result = 14.9 \n")
        print("alpha(price) = ", round(b[0][0], 3), "; Nevo's result = -32.433 \n")
        print("Sigma = \n", params_estimated[0:(self.k+1)], "\n")
        print("Pi = \n", params_estimated[(self.k+1):].reshape(((self.k+1), (self.k+1))), "\n")

"""
Usage
"""
if __name__ == "__main__":
    estimator = BLPEstimator()
    params_estimated, b, final_obj = estimator.estimate()
    estimator.print_results(params_estimated, b, final_obj)

Elapsed time: 0m 35s 

--------------------  Replication results  -------------------- 

Context: 
 Length of x vector (k) =  3 ,
 Number of markets (M) =  94 , 
 Number of draws (N) =  20 

GMM objective value =  15.4 ; Nevo's result = 14.9 

alpha(price) =  -36.908 ; Nevo's result = -32.433 

Sigma = 
 [2.49897746e-04 9.33870292e+00 2.45505416e-06 3.07782234e-02] 

Pi = 
 [[ 3.34507418  0.          1.58247038  0.        ]
 [ 1.64308535  0.2381023   0.         11.13164642]
 [-0.27366374  0.          0.02471842  0.        ]
 [ 2.20558565  0.         -1.99445009  0.        ]]

The BLP Method of Demand Curve Estimation¶

Model¶

Empirical Strategy¶

The Endogeneity of Price¶

The Choice Model and Market Shares¶

Utility Parameters¶

Market Shares and Mean Utilities¶

The Nested Fixed Point Algorithm (NFXP)¶

Estimation¶

Resources¶

Steps¶

Example: Replication Nevo (2000)¶

References¶