2013-06-19 2 views
2

Это мой запросОптимизация Sub запроса, сумма и топ функции

select t.accountno from tenant_info t where 
(
    select sum(d.Bill_Amount) from billing_summary s , Billing_Detail d 
    where s.Id=d.Billing_Summary_ID and s.id in 
     (select top 2 Id from Billing_Summary where Tenant_Info_ID = t.TenantId 
     order by bill_date desc) 
) = 0 

Количество записей в таблицах в миллионах, то почему этот запрос дает результат в как 3 минуты и до этого приложение не истекло. Я думаю, что третий подзапрос должен быть написан лучше, но не может выйти с каким-либо решением.

Есть ли лучшее решение?

+0

может запустить 'объяснить выбор (...)', и напечатать результат здесь. –

+0

Он выбирает два лучших счета (Desc Date) конкретного Арендатора и проверяет, равна ли их сумма нулю, тогда он вернет эту запись. – Mustafa

+0

Я использую SQL Server 2008. Я считаю, что третий подзапрос занимает большую часть времени, поскольку ему приходилось объединять и выбирать между миллионами записей. – Mustafa

ответ

0

Вышеуказанный запрос SQL-запроса от @Mustafa явно более изящный, но это работает быстрее.

УСТАНОВКА ИСПЫТАНИЯ ДАННЫЕ:

--/* 
    SET NOCOUNT ON 

    IF OBJECT_ID('Tenant_Info', 'U') IS NOT NULL DROP TABLE Tenant_Info; 
    GO 
    CREATE TABLE dbo.Tenant_Info (
     TenantId   int IDENTITY(1, 1) 
     ,AccountNo   varchar(10) 
    ) 
    GO 
    IF OBJECT_ID('Billing_Summary', 'U') IS NOT NULL DROP TABLE Billing_Summary; 
    GO 
    CREATE TABLE dbo.Billing_Summary (
     Id     int IDENTITY(1, 1) 
     ,Tenant_Info_ID  int 
     ,Bill_Date   date 
    ) 
    GO 
    IF OBJECT_ID('Billing_Detail', 'U') IS NOT NULL DROP TABLE Billing_Detail; 
    GO 
    CREATE TABLE dbo.Billing_Detail (
     Id     int IDENTITY(1, 1) 
     ,Billing_Summary_ID int 
     ,Bill_Amount   decimal(28, 10) 
    ) 
    GO 

    -- ==================================================================== 
    -- CREATE THE Tenant_Info TEST DATA 
    -- Turn 1 row into 1,048,576 
    INSERT Tenant_Info (AccountNo) SELECT '' 

    DECLARE @Count int; SELECT @Count = 0; 
    WHILE @Count < 20 -- 20: 1,048,576 10: 1,024 
     BEGIN 
     INSERT Tenant_Info (AccountNo) 
     SELECT AccountNo FROM Tenant_Info 

     SELECT @Count = (@Count + 1) 
    END 

    UPDATE TI SET TI.AccountNo = CONVERT(varchar(10), (1000000000 + TI.TenantId)) 
    -- SELECT COUNT(*) 
     FROM Tenant_Info TI 

    CREATE NONCLUSTERED INDEX idx_Tenant_Info ON Tenant_Info (TenantId); 
    -- ==================================================================== 



    -- ==================================================================== 
    -- CREATE THE Billing_Summary TEST DATA (12 MONTHS FOR EACH TENANT) 
    DECLARE @Count2 int; SELECT @Count2 = 0; 
    WHILE @Count2 < 12 
     BEGIN 
     INSERT Billing_Summary (Tenant_Info_ID, Bill_Date) 
     SELECT 1, DATEADD(MONTH, @Count2, '08/01/2012') 

     SELECT @Count2 = (@Count2 + 1)    
    END 

    -- Ensure that the Max Bill Date has the lowest Billing_Summary Id to 
    -- avoid the assumption that the Ids are in order 
    DECLARE @MaxBillDate date; 
    SELECT @MaxBillDate = MAX(Bill_Date) FROM Billing_Summary 

    INSERT Billing_Summary (Tenant_Info_ID, Bill_Date) 
    SELECT TI.TenantId, BS.Bill_Date 
     FROM Tenant_Info TI 
     CROSS JOIN Billing_Summary BS 
    WHERE TI.TenantId <> 1 
     AND BS.Bill_Date = @MaxBillDate 

    INSERT Billing_Summary (Tenant_Info_ID, Bill_Date) 
    SELECT TI.TenantId, BS.Bill_Date 
     FROM Tenant_Info TI 
     CROSS JOIN Billing_Summary BS 
    WHERE TI.TenantId <> 1 
     AND BS.Bill_Date <> @MaxBillDate 

    CREATE NONCLUSTERED INDEX idx_Billing_Summary_1 ON Billing_Summary (Id, Tenant_Info_ID); 
    CREATE NONCLUSTERED INDEX idx_Billing_Summary_2 ON Billing_Summary (Tenant_Info_ID, Bill_Date); 
    --SELECT COUNT(*) FROM Billing_Summary 
    --SELECT COUNT(DISTINCT Tenant_Info_ID) FROM Billing_Summary 
    --SELECT COUNT(DISTINCT Bill_Date) FROM Billing_Summary 
    --SELECT * FROM Billing_Summary WHERE Tenant_Info_ID = 1 
    -- ==================================================================== 



    -- ==================================================================== 
    -- CREATE THE Billing_Detail TEST DATA (2 LINES FOR EACH SUMMARY) 
    INSERT Billing_Detail (Billing_Summary_ID, Bill_Amount) 
    SELECT BS.Id, BS.Tenant_Info_ID as [Bill_Amount] 
     FROM Billing_Summary BS 

    INSERT Billing_Detail (Billing_Summary_ID, Bill_Amount) 
    SELECT BS.Id, BS.Tenant_Info_ID as [Bill_Amount] 
     FROM Billing_Summary BS 


    DECLARE @MaxBillDateB date, @2ndMaxBillDateB date; 
    SELECT @MaxBillDateB = MAX(Bill_Date) FROM Billing_Summary 
    SELECT @2ndMaxBillDateB = MAX(Bill_Date) FROM Billing_Summary WHERE Bill_Date <> @MaxBillDateB 

    UPDATE BD SET BD.Bill_Amount = 0 
     FROM Billing_Detail BD 
     JOIN Billing_Summary BS 
     ON BD.Billing_Summary_ID = BS.Id 
     JOIN (
      SELECT TOP 50 PERCENT A.* 
       FROM (
        SELECT DISTINCT BS.Tenant_Info_ID 
         FROM Billing_Detail BD 
         JOIN Billing_Summary BS 
         ON BD.Billing_Summary_ID = BS.Id 
        ) A 
      ORDER BY NEWID() 
      ) B 
     ON BS.Tenant_Info_ID = B.Tenant_Info_ID 
     AND BS.Bill_Date  IN (@2ndMaxBillDateB, @MaxBillDateB) 

    CREATE NONCLUSTERED INDEX idx_Billing_Detail_1 ON Billing_Detail (Id); 
    CREATE NONCLUSTERED INDEX idx_Billing_Detail_2 ON Billing_Detail (Billing_Summary_ID); 
    -- SELECT BS.Tenant_Info_ID, SUM(BD.Bill_Amount) FROM Billing_Detail BD JOIN Billing_Summary BS ON BD.Billing_Summary_ID = BS.Id WHERE BD.Bill_Amount = 0 GROUP BY BS.Tenant_Info_ID 
    -- ==================================================================== 

    SET NOCOUNT OFF 
    --*/ 

RUN СПРАВКА SCRIPT:

/*   
    -- ORIGINAL QUERY TAKING 3 MINUTES BY THE QUESTION ASKER 
    SELECT COUNT(*) -- On my system this runs in about ~25 seconds for a 524,288 row result 
     FROM (
       select t.accountno from tenant_info t where 
       (
        select sum(d.Bill_Amount) from billing_summary s , Billing_Detail d 
        where s.Id=d.Billing_Summary_ID and s.id in 
         (select top 2 Id from Billing_Summary where Tenant_Info_ID = t.TenantId 
         order by bill_date desc) 
       ) = 0 
      ) A 
    --*/ 


    -- Start off getting the max bill date for each tenant 
    IF OBJECT_ID('tempdb..#Tenant_MaxBillDate', 'U') IS NOT NULL DROP TABLE #Tenant_MaxBillDate; 
    CREATE TABLE #Tenant_MaxBillDate (Tenant_Info_ID int, Bill_Date date); 

    INSERT #Tenant_MaxBillDate (Tenant_Info_ID, Bill_Date) 
    SELECT BS.Tenant_Info_ID, MAX(BS.Bill_Date) 
     FROM Billing_Summary BS 
    GROUP BY BS.Tenant_Info_ID 

    CREATE NONCLUSTERED INDEX idx_#Tenant_MaxBillDate ON #Tenant_MaxBillDate (Tenant_Info_ID, Bill_Date); 


    -- Additionally, get the 2nd latest bill date for each tenant 
    IF OBJECT_ID('tempdb..#Tenant_2ndToMaxBillDate', 'U') IS NOT NULL DROP TABLE #Tenant_2ndToMaxBillDate; 
    CREATE TABLE #Tenant_2ndToMaxBillDate (Tenant_Info_ID int, Bill_Date date); 

    INSERT #Tenant_2ndToMaxBillDate (Tenant_Info_ID, Bill_Date) 
    SELECT BS.Tenant_Info_ID, MAX(BS.Bill_Date) 
     FROM Billing_Summary BS 
     LEFT JOIN #Tenant_MaxBillDate TM 
     ON BS.Tenant_Info_ID = TM.Tenant_Info_ID 
     AND BS.Bill_Date  = TM.Bill_Date 
    WHERE TM.Tenant_Info_ID IS NULL 
    GROUP BY BS.Tenant_Info_ID 

    CREATE NONCLUSTERED INDEX idx_#Tenant_2ndToMaxBillDate ON #Tenant_2ndToMaxBillDate (Tenant_Info_ID, Bill_Date); 


    -- This is necessary to avoid an incorrect assumpttion that the max summary id also has the latest bill date 
    IF OBJECT_ID('tempdb..#Tenant_MaxBillSummary', 'U') IS NOT NULL DROP TABLE #Tenant_MaxBillSummary; 
    CREATE TABLE #Tenant_MaxBillSummary (Tenant_Info_ID int, Billing_Summary_ID int); 

    INSERT #Tenant_MaxBillSummary (Tenant_Info_ID, Billing_Summary_ID) 
    SELECT BS.Tenant_Info_ID, MAX(BS.Id) 
     FROM Billing_Summary BS 
     JOIN #Tenant_MaxBillDate TM 
     ON BS.Tenant_Info_ID = TM.Tenant_Info_ID 
     AND BS.Bill_Date  = TM.Bill_Date 
    GROUP BY BS.Tenant_Info_ID 

    CREATE NONCLUSTERED INDEX idx_#Tenant_MaxBillSummary ON #Tenant_MaxBillSummary (Tenant_Info_ID, Billing_Summary_ID); 


    -- This is necessary in case the 2nd max summary id is also on the latest bill date 
    IF OBJECT_ID('tempdb..#Tenant_2ndToMaxBillSummary_SameDate', 'U') IS NOT NULL DROP TABLE #Tenant_2ndToMaxBillSummary_SameDate; 
    CREATE TABLE #Tenant_2ndToMaxBillSummary_SameDate (Tenant_Info_ID int, Billing_Summary_ID int); 

    INSERT #Tenant_2ndToMaxBillSummary_SameDate (Tenant_Info_ID, Billing_Summary_ID) 
    SELECT BS.Tenant_Info_ID, MAX(BS.Id) 
     FROM Billing_Summary BS 
     JOIN #Tenant_MaxBillDate TM 
     ON BS.Tenant_Info_ID = TM.Tenant_Info_ID 
     AND BS.Bill_Date  = TM.Bill_Date 
     LEFT JOIN #Tenant_MaxBillSummary TMM 
     ON BS.Id    = TMM.Billing_Summary_ID 
     AND BS.Tenant_Info_ID = TMM.Tenant_Info_ID 
    WHERE TMM.Billing_Summary_ID IS NULL 
    GROUP BY BS.Tenant_Info_ID 

    CREATE NONCLUSTERED INDEX idx_#Tenant_2ndToMaxBillSummary_SameDate ON #Tenant_2ndToMaxBillSummary_SameDate (Tenant_Info_ID, Billing_Summary_ID); 


    -- This will probably get the majority of cases where the 2nd max summary id is on the 2nd latest bill date 
    IF OBJECT_ID('tempdb..#Tenant_2ndToMaxBillSummary_2ndDate', 'U') IS NOT NULL DROP TABLE #Tenant_2ndToMaxBillSummary_2ndDate; 
    CREATE TABLE #Tenant_2ndToMaxBillSummary_2ndDate (Tenant_Info_ID int, Billing_Summary_ID int); 

    INSERT #Tenant_2ndToMaxBillSummary_2ndDate (Tenant_Info_ID, Billing_Summary_ID) 
    SELECT BS.Tenant_Info_ID, MAX(BS.Id) 
     FROM Billing_Summary BS 
     JOIN #Tenant_2ndToMaxBillDate TM 
     ON BS.Tenant_Info_ID  = TM.Tenant_Info_ID 
     AND BS.Bill_Date   = TM.Bill_Date 
     LEFT JOIN #Tenant_MaxBillSummary TMS 
     ON BS.Id     = TMS.Billing_Summary_ID 
     LEFT JOIN #Tenant_2ndToMaxBillSummary_SameDate TMS2 
     ON BS.Tenant_Info_ID  = TMS2.Tenant_Info_ID 
    WHERE 1=1 
     AND TMS.Billing_Summary_ID IS NULL -- Do not choose the same summary ID 
     AND TMS2.Tenant_Info_ID IS NULL -- Do not choose a tenant that has to summary entries for their max date 
    GROUP BY BS.Tenant_Info_ID 

    CREATE NONCLUSTERED INDEX idx_#Tenant_2ndToMaxBillSummary_2ndDate ON #Tenant_2ndToMaxBillSummary_2ndDate (Tenant_Info_ID, Billing_Summary_ID); 


    IF OBJECT_ID('tempdb..#Tenant_Top2BillSummaryRecords', 'U') IS NOT NULL DROP TABLE #Tenant_Top2BillSummaryRecords; 
    CREATE TABLE #Tenant_Top2BillSummaryRecords (Tenant_Info_ID int, Billing_Summary_ID int); 

    INSERT #Tenant_Top2BillSummaryRecords (Tenant_Info_ID, Billing_Summary_ID) 
    SELECT Tenant_Info_ID, Billing_Summary_ID FROM #Tenant_MaxBillSummary 
    UNION ALL 
    SELECT Tenant_Info_ID, Billing_Summary_ID FROM #Tenant_2ndToMaxBillSummary_SameDate 
    UNION ALL 
    SELECT Tenant_Info_ID, Billing_Summary_ID FROM #Tenant_2ndToMaxBillSummary_2ndDate 

    CREATE NONCLUSTERED INDEX idx_#Tenant_Top2BillSummaryRecords ON #Tenant_Top2BillSummaryRecords (Tenant_Info_ID, Billing_Summary_ID); 
    CREATE NONCLUSTERED INDEX idx_#Tenant_Top2BillSummaryRecords2 ON #Tenant_Top2BillSummaryRecords (Billing_Summary_ID, Tenant_Info_ID); 


    SELECT COUNT(*) -- On my system this runs in about ~14 seconds for a 524,288 row result 
     FROM (
      SELECT TI.TenantId, TI.AccountNo 
       FROM #Tenant_Top2BillSummaryRecords A 
       JOIN Tenant_Info TI 
       ON A.Tenant_Info_ID = TI.TenantId 
       JOIN Billing_Summary BS 
       ON A.Billing_Summary_ID = BS.Id 
       JOIN Billing_Detail BD 
       ON BS.Id    = BD.Billing_Summary_ID 
      GROUP BY TI.TenantId, TI.AccountNo 
      HAVING SUM(BD.Bill_Amount) = 0 
      ) A 
Смежные вопросы