2017-01-19 3 views
5

Все,Ресурсы Превышены во время выполнения запроса. BigQuery

, которые я пытался получить некоторые из нашего большого кода аналитики, проходящей через BigQuery, но я по-прежнему столкнуться с проблемами для многих стран и объема данных, который присутствует. Мы говорим о годах данных. Может быть, мой запрос не соответствует вкусу, но попытка получить суммы, основанные на конкретной группе, - это то, что мне нужно.

Что мне нужно изменить в запросе, чтобы предотвратить превышение ресурсов?

SELECT 
    COMPANY_NAME, 
    RATING_CLASS, 
    COMPANY_KEY, 
    -- State Info & Calculations 
    -- Over is used as a WINDOW function to SUM ALL results within the given query 
    SUM(ZIP5_MED_SUPP_TOOL_NUM_QUOTE) OVER() AS STATE_MED_SUPP_TOOL_NUM_QUOTE, 
    -- ZIP3 Info & Calculations 
    ZIP3, 
    ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP3_TOTAL_RESULT_APPEARANCE, 
    ZIP3_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST, 
    ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST, 
    ZIP3_AVG_RANK AS ZIP3_AVG_RANK, 
    ZIP3_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5, 
    ZIP3_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10, 
    ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF, 
    ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK, 
    ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5, 
    ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10, 
    ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    -- ZIP5 Info & Calculations 
    ZIP5, 
    ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP5_TOTAL_RESULT_APPEARANCE, 
    ZIP5_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST, 
    ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST, 
    ZIP5_AVG_RANK AS ZIP5_AVG_RANK, 
    ZIP5_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5, 
    ZIP5_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10, 
    ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF, 
    ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK, 
    ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5, 
    ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10, 
    ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, 
FROM (
    SELECT 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    -- ZIP3 
    ZIP3, 
    COUNT(DISTINCT logging_key) OVER (PARTITION BY ZIP3) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE, 
    COUNT(*) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOTAL_RESULT_APPEARANCE, 
    SUM(CASE 
     WHEN lowest = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_RANK, 
    SUM(CASE 
     WHEN top5 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOP5_COUNT, 
    SUM(CASE 
     WHEN top10 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOP10_COUNT, 
    AVG(discounted_cent_diff) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_CENT_DIFF, 
    SUM(CASE 
     WHEN DISCOUNTED_lowest = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_RANK, 
    SUM(CASE 
     WHEN DISCOUNTED_top5 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_top10 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_cent_diff) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    -- ZIP5 
    ZIP5, 
    COUNT(DISTINCT logging_key) OVER (PARTITION BY ZIP5) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE, 
    COUNT(*) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOTAL_RESULT_APPEARANCE, 
    SUM(CASE 
     WHEN lowest = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_RANK, 
    SUM(CASE 
     WHEN top5 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOP5_COUNT, 
    SUM(CASE 
     WHEN top10 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOP10_COUNT, 
    AVG(discounted_cent_diff) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_CENT_DIFF, 
    SUM(CASE 
     WHEN DISCOUNTED_lowest = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_RANK, 
    SUM(CASE 
     WHEN DISCOUNTED_top5 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_top10 = TRUE THEN 1 
     ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_cent_diff) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    ZIP3, 
    ZIP5, 
    LOWEST, 
    RATIO_TO_MIN, 
    RATE_ORDER, 
    TOP5, 
    TOP10, 
    CENT_DIFF, 
    DISCOUNTED_LOWEST, 
    DISCOUNTED_RATIO_TO_MIN, 
    DISCOUNTED_RATE_ORDER, 
    DISCOUNTED_TOP5, 
    DISCOUNTED_TOP10, 
    DISCOUNTED_CENT_DIFF, 
    LOGGING_KEY) 
GROUP BY 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    -- ZIP3 General 
    ZIP3, 
    ZIP3_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP3_TOTAL_RESULT_APPEARANCE, 
    ZIP3_LOWEST, 
    ZIP3_AVG_RATIO_TO_LOWEST, 
    ZIP3_AVG_RANK, 
    ZIP3_TOP5, 
    ZIP3_TOP10, 
    ZIP3_AVG_CENT_DIFF, 
    ZIP3_DISCOUNTED_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RANK, 
    ZIP3_DISCOUNTED_TOP5, 
    ZIP3_DISCOUNTED_TOP10, 
    ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    -- ZIP5 General 
    ZIP5, 
    ZIP5_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP5_TOTAL_RESULT_APPEARANCE, 
    ZIP5_LOWEST, 
    ZIP5_AVG_RATIO_TO_LOWEST, 
    ZIP5_AVG_RANK, 
    ZIP5_TOP5, 
    ZIP5_TOP10, 
    ZIP5_AVG_CENT_DIFF, 
    ZIP5_DISCOUNTED_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RANK, 
    ZIP5_DISCOUNTED_TOP5, 
    ZIP5_DISCOUNTED_TOP10, 
    ZIP5_DISCOUNTED_AVG_CENT_DIFF 

Обновленный запрос с предложенными исправлениями:

SELECT 
    main.COMPANY_NAME AS COMPANY_NAME, 
    main.COMPANY_KEY AS COMPANY_KEY, 
    main.RATING_CLASS AS RATING_CLASS, 
    state_count.STATE_MED_SUPP_TOOL_NUM_QUOTE AS STATE_MED_SUPP_TOOL_NUM_QUOTE, 
    -- ZIP3 
    main.ZIP3 AS ZIP3, 
    ZIP3_COUNT.ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE, 
    ZIP3_SUB.ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST, 
    ZIP3_SUB.ZIP3_TOP5_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5, 
    ZIP3_SUB.ZIP3_LOWEST_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST, 
    ZIP3_SUB.ZIP3_TOP10_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10, 
    ZIP3_SUB.ZIP3_AVG_RANK AS ZIP3_AVG_RANK, 
    ZIP3_SUB.ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF, 
    ZIP3_SUB.ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP3_SUB.ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5, 
    ZIP3_SUB.ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST, 
    ZIP3_SUB.ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10, 
    ZIP3_SUB.ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK, 
    ZIP3_SUB.ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    -- ZIP5 
    main.ZIP5 AS ZIP5, 
    ZIP5_COUNT.ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE, 
    ZIP5_SUB.ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST, 
    ZIP5_SUB.ZIP5_TOP5_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5, 
    ZIP5_SUB.ZIP5_LOWEST_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST, 
    ZIP5_SUB.ZIP5_TOP10_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10, 
    ZIP5_SUB.ZIP5_AVG_RANK AS ZIP5_AVG_RANK, 
    ZIP5_SUB.ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF, 
    ZIP5_SUB.ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP5_SUB.ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5, 
    ZIP5_SUB.ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST, 
    ZIP5_SUB.ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10, 
    ZIP5_SUB.ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK, 
    ZIP5_SUB.ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, 
FROM (
    SELECT 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    ZIP3, 
    ZIP5, 
    STATE, 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824", 
     "4872666167115776", 
     "6396348765044736", 
     "6139303562313728", 
     "4988973881491456") 
    AND portal_key NOT IN ("5878607637381120") 
    GROUP BY 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    ZIP3, 
    ZIP5, 
    STATE, 
    ) AS MAIN 
LEFT JOIN (
    SELECT 
    ZIP3, 
    COUNT(*) AS ZIP3_TOTAL_RESULT_APPEARANCE, 
    COMPANY_KEY, 
    RATING_CLASS, 
    AVG(discounted_ratio_to_min) AS ZIP3_AVG_RATIO_TO_LOWEST, 
    SUM(CASE 
     WHEN TOP5 = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_TOP5_COUNT, 
    SUM(CASE 
     WHEN LOWEST = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_LOWEST_COUNT, 
    SUM(CASE 
     WHEN TOP10 = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_TOP10_COUNT, 
    AVG(discounted_rate_order) AS ZIP3_AVG_RANK, 
    AVG(discounted_cent_diff) AS ZIP3_AVG_CENT_DIFF, 
    AVG(discounted_ratio_to_min) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    SUM(CASE 
     WHEN DISCOUNTED_TOP5 = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_LOWEST = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_DISCOUNTED_LOWEST_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_TOP10 = TRUE THEN 1 
     ELSE 0 END) AS ZIP3_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_rate_order) AS ZIP3_DISCOUNTED_AVG_RANK, 
    AVG(discounted_cent_diff) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824") 
    GROUP BY 
    ZIP3, 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    ) AS ZIP3_SUB 
ON 
    main.ZIP3 = ZIP3_SUB.ZIP3 
    AND main.COMPANY_KEY = ZIP3_SUB.COMPANY_KEY 
    AND main.RATING_CLASS = ZIP3_SUB.RATING_CLASS 
LEFT JOIN (
    SELECT 
    ZIP3, 
    EXACT_COUNT_DISTINCT(logging_key) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824") 
    GROUP BY 
    ZIP3) AS ZIP3_COUNT 
ON 
    main.ZIP3 = ZIP3_COUNT.ZIP3 
LEFT JOIN (
    SELECT 
    ZIP5, 
    COUNT(*) AS ZIP5_TOTAL_RESULT_APPEARANCE, 
    COMPANY_KEY, 
    RATING_CLASS, 
    AVG(discounted_ratio_to_min) AS ZIP5_AVG_RATIO_TO_LOWEST, 
    SUM(CASE 
     WHEN TOP5 = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_TOP5_COUNT, 
    SUM(CASE 
     WHEN LOWEST = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_LOWEST_COUNT, 
    SUM(CASE 
     WHEN TOP10 = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_TOP10_COUNT, 
    AVG(discounted_rate_order) AS ZIP5_AVG_RANK, 
    AVG(discounted_cent_diff) AS ZIP5_AVG_CENT_DIFF, 
    AVG(discounted_ratio_to_min) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    SUM(CASE 
     WHEN DISCOUNTED_TOP5 = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_LOWEST = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_DISCOUNTED_LOWEST_COUNT, 
    SUM(CASE 
     WHEN DISCOUNTED_TOP10 = TRUE THEN 1 
     ELSE 0 END) AS ZIP5_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_rate_order) AS ZIP5_DISCOUNTED_AVG_RANK, 
    AVG(discounted_cent_diff) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824") 
    GROUP BY 
    ZIP5, 
    COMPANY_NAME, 
    COMPANY_KEY, 
    RATING_CLASS, 
    ) AS ZIP5_SUB 
ON 
    main.ZIP5 = ZIP5_SUB.ZIP5 
    AND main.COMPANY_KEY = ZIP5_SUB.COMPANY_KEY 
    AND main.RATING_CLASS = ZIP5_SUB.RATING_CLASS 
LEFT JOIN (
    SELECT 
    ZIP5, 
    EXACT_COUNT_DISTINCT(logging_key) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824") 
    GROUP BY 
    ZIP5) AS ZIP5_COUNT 
ON 
    main.ZIP5 = ZIP5_COUNT.ZIP5 
LEFT JOIN (
    SELECT 
    STATE, 
    EXACT_COUNT_DISTINCT(logging_key) AS STATE_MED_SUPP_TOOL_NUM_QUOTE 
    FROM 
    [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE 
    SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", 
     "6277136540237824") 
    GROUP BY 
    STATE) AS STATE_COUNT 
ON 
    main.STATE = STATE_COUNT.STATE 

Объяснение: В связи с тем BigQuery является общим ресурсом, BigQuery аппроксимирует количество ресурсов, что запрос будет требовать и выделять соответствующие ресурсы , Разбирая запрос на несколько объединений в одной таблице, вычисление может быть выполнено за счет большего количества ресурсов, чем первоначально предполагалось. Для более подробного объяснения того, почему ограничение запроса работает хорошо, см. Иордания Тигани о реакции переполнения стека here.

+0

просто интересно, сколько гигабайтов обрабатывает этот запрос примерно? –

+0

У меня есть некоторые идеи, почему ваш запрос так дорог. Одна из причин - потенциально, потому что она сильно перепроектирована. Но без ясного представления о том, что именно (логический мудрый) вы пытаетесь достичь здесь - трудно сделать окончательный вывод и легко ошибиться в суждениях. Я предлагаю вам дать нам подробное объяснение того, что вы пытаетесь получить в этом запросе, поэтому мы сможем помочь вам не быть слепыми, как сейчас: o) –

ответ

2

Как-то я чувствую, что это то, что вам нужно. Я могу ошибаться, поскольку это действительно только относительно слепая попытка перепроектировать свою логику, так что не судите сильно, если я ошибаюсь здесь
Не может проверить это, но я чувствую, что это будет дешево на самом деле добиться успеха

SELECT 
    main.COMPANY_NAME AS COMPANY_NAME, 
    main.COMPANY_KEY AS COMPANY_KEY, 
    main.RATING_CLASS AS RATING_CLASS, 
    SUM(ZIP5_MED_SUPP_TOOL_NUM_QUOTE) OVER() AS STATE_MED_SUPP_TOOL_NUM_QUOTE, 
    -- ZIP3 
    main.ZIP3 AS ZIP3, 
    ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP3_TOTAL_RESULT_APPEARANCE, 
    ZIP3_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST, 
    ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST, 
    ZIP3_AVG_RANK AS ZIP3_AVG_RANK, 
    ZIP3_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5, 
    ZIP3_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10, 
    ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF, 
    ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK, 
    ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5, 
    ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10, 
    ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    -- ZIP5 
    main.ZIP5 AS ZIP5, 
    ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE, 
    ZIP5_TOTAL_RESULT_APPEARANCE, 
    ZIP5_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST, 
    ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST, 
    ZIP5_AVG_RANK AS ZIP5_AVG_RANK, 
    ZIP5_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5, 
    ZIP5_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10, 
    ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF, 
    ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK, 
    ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5, 
    ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10, 
    ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF 
FROM (
    SELECT COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5 
    FROM [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5 
) AS main 
LEFT JOIN (
    SELECT 
    ZIP3, company_key, rating_class, 
    COUNT(*)             AS ZIP3_TOTAL_RESULT_APPEARANCE, 
    SUM(CASE WHEN lowest = TRUE THEN 1 ELSE 0 END)   AS ZIP3_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min)        AS ZIP3_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order)        AS ZIP3_AVG_RANK, 
    SUM(CASE WHEN top5 = TRUE THEN 1 ELSE 0 END)    AS ZIP3_TOP5_COUNT, 
    SUM(CASE WHEN top10 = TRUE THEN 1 ELSE 0 END)    AS ZIP3_TOP10_COUNT, 
    AVG(discounted_cent_diff)         AS ZIP3_AVG_CENT_DIFF, 
    SUM(CASE WHEN DISCOUNTED_lowest = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min)        AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order)        AS ZIP3_DISCOUNTED_AVG_RANK, 
    SUM(CASE WHEN DISCOUNTED_top5 = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE WHEN DISCOUNTED_top10 = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_cent_diff)         AS ZIP3_DISCOUNTED_AVG_CENT_DIFF, 
    FROM [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
     AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
     AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY ZIP3, company_key, rating_class 
) AS zip3_sub 
ON main.ZIP3 = zip3_sub.ZIP3 AND main.company_key = zip3_sub.company_key AND main.rating_class = zip3_sub.rating_class 
LEFT JOIN ( 
    SELECT 
    ZIP5, company_key, rating_class, 
    COUNT(*)             AS ZIP5_TOTAL_RESULT_APPEARANCE, 
    SUM(CASE WHEN lowest = TRUE THEN 1 ELSE 0 END)   AS ZIP5_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min)        AS ZIP5_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order)        AS ZIP5_AVG_RANK, 
    SUM(CASE WHEN top5 = TRUE THEN 1 ELSE 0 END)    AS ZIP5_TOP5_COUNT, 
    SUM(CASE WHEN top10 = TRUE THEN 1 ELSE 0 END)    AS ZIP5_TOP10_COUNT, 
    AVG(discounted_cent_diff)         AS ZIP5_AVG_CENT_DIFF, 
    SUM(CASE WHEN DISCOUNTED_lowest = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_LOWEST_COUNT, 
    AVG(discounted_ratio_to_min)        AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST, 
    AVG(discounted_rate_order)        AS ZIP5_DISCOUNTED_AVG_RANK, 
    SUM(CASE WHEN DISCOUNTED_top5 = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_TOP5_COUNT, 
    SUM(CASE WHEN DISCOUNTED_top10 = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_TOP10_COUNT, 
    AVG(discounted_cent_diff)         AS ZIP5_DISCOUNTED_AVG_CENT_DIFF, 
    FROM [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY ZIP5, company_key, rating_class 
) AS zip5_sub 
ON main.ZIP5 = zip5_sub.ZIP5 AND main.company_key = zip5_sub.company_key AND main.rating_class = zip5_sub.rating_class 
LEFT JOIN (
    SELECT ZIP3, COUNT(DISTINCT logging_key) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE 
    FROM [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY ZIP3 
) AS zip3_count 
ON main.ZIP3 = zip3_count.ZIP3 
LEFT JOIN (
    SELECT ZIP5, COUNT(DISTINCT logging_key) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE 
    FROM [csgapi:qh_med_supp_tool.v2_TX] 
    WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z') 
    AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z') 
    AND user_key NOT IN ("6522869941010432", "6277136540237824") 
    GROUP BY ZIP5 
) AS zip5_count 
ON main.ZIP5 = zip5_count.ZIP5 

Кроме того, обратите внимание: в функции BigQuery Legacy SQL - COUNT(DISTINCT) функция вероятностна - дает статистическую аппроксимацию и не гарантируется точность.
Вы можете использовать EXACT_COUNT_DISTINCT() функцию вместо - это один дает точное число, но немного дороже на заднем конце

И конечно весь запрос может быть переписан для BigQuery Standard SQL при COUNT (DISTINCT) производит точное количество и стандарт SQL является предпочтительнее использовать в рекомендации команды BigQuery Team

+0

WOW. Вы просто взорвали мой разум. Я предполагаю, что с точки зрения BigQuery и того, как они распределяют ресурсы, это имеет смысл. Спасибо, что поделились этим с нами. Я опубликовал обновленный запрос для учета изменений, которые вы предложили. Это работает. Среднее время работы от 86 секунд до 30 секунд. – SmittySmee

Смежные вопросы