Агрегация ElasticSearch всеми токенами в поле строки

У меня есть ElasticSearch 2.4, и я пытаюсь выполнить агрегацию в текстовом поле типа String, которое содержит несколько токенов. Поле, о котором идет речь, является полем адреса, называемым mailingAddress. Например, ниже приведены несколько результатов, которые ищут NY в поле адреса.Агрегация ElasticSearch всеми токенами в поле строки

{ 
    "from": 0, 
    "size": 100, 
    "sort": [ 
    { 
     "_score": { 
     "order": "desc" 
     } 
    } 
    ], 
    "query": { 
    "bool": { 
     "must": [ 
     { 
      "bool": { 
      "must": [ 
       { 
       "match": { 
        "customerprofile.mailingAddress": { 
        "query": "NY", 
        "fuzziness": 0, 
        "operator": "or" 
        } 
       } 
       }, 
       { 
       "match": { 
        "customerprofile.companyId": { 
        "query": "999", 
        "fuzziness": 0, 
        "operator": "or" 
        } 
       } 
       } 
      ] 
      } 
     } 
     ] 
    } 
    } 
}

возвращает

"hits":[ 
    { 
     "_index":"wht_index_prod_v33_es24", 
     "_type":"customerprofile", 
     "_id":"2044", 
     "_score":2.9787974, 
     "_source":{ 
     "customerId":2044, 
     "companyId":2007, 
     "fullName":"John Doe", 
     "email":"[email protected]", 
     "pictureURL":"john.png", 
     "profilePictureContentType":"image/png", 
     "phone":"(703) 999-8888", 
     "mailingAddress":"100 Lake Braddock Drive\nBurke, NY 22015", 
     "gender":"Male", 
     "emergencyContactsIds":[ 

     ], 
     "wantCorrespondence":false 
     } 
    }, 
    { 
     "_index":"wht_index_prod_v33_es24", 
     "_type":"customerprofile", 
     "_id":"2045", 
     "_score":2.9787974, 
     "_source":{ 
     "customerId":2045, 
     "companyId":2007, 
     "fullName":"Jane Anderson", 
     "email":"[email protected]", 
     "pictureURL":"JAnderson.png", 
     "profilePictureContentType":"image/png", 
     "phone":"(434) 111-2345", 
     "mailingAddress":"PO Box 333, Boydton, NY 23917", 
     "gender":"Male", 
     "emergencyContactsIds":[ 

     ], 
     "wantCorrespondence":false 
     } 
    }, 
.. 
.. 
]

Вопрос
Когда я агрегация по mailingAddress я ожидал увидеть ведра для каждого слова в текстовом поле. Из приведенных выше результатов я ожидаю также найти ведро-ключ под названием «NY», но его нет. Может ли кто-нибудь объяснить, почему, я полагаю, у него слишком мало записей?

агрегацию:

{ 
    "size": 0, 
    "aggs": { 
    "group_by_age": { 
     "terms": { 
     "field": "mailingAddress" 
     }, 
     "aggs": { 
     "group_by_gender": { 
      "terms": { 
      "field": "gender" 
      } 
     } 
     } 
    } 
    } 
}

Результаты Aggregation:

{ 
    "took": 16, 
    "timed_out": false, 
    "_shards": { 
    "total": 1, 
    "successful": 1, 
    "failed": 0 
    }, 
    "hits": { 
    "total": 401, 
    "max_score": 0, 
    "hits": [ 

    ] 
    }, 
    "aggregations": { 
    "group_by_age": { 
     "doc_count_error_upper_bound": 0, 
     "sum_other_doc_count": 1041, 
     "buckets": [ 
     { 
      "key": "st", 
      "doc_count": 30, 
      "group_by_gender": { 
      "doc_count_error_upper_bound": 0, 
      "sum_other_doc_count": 0, 
      "buckets": [ 
       { 
       "key": "female", 
       "doc_count": 17 
       }, 
       { 
       "key": "male", 
       "doc_count": 13 
       } 
      ] 
      } 
     }, 
     { 
      "key": "ca", 
      "doc_count": 28, 
      "group_by_gender": { 
      "doc_count_error_upper_bound": 0, 
      "sum_other_doc_count": 0, 
      "buckets": [ 
       { 
       "key": "female", 
       "doc_count": 21 
       }, 
       { 
       "key": "male", 
       "doc_count": 7 
       } 
      ] 
      } 
     }, 
     { 
      "key": "dr", 
      "doc_count": 16, 
      "group_by_gender": { 
      "doc_count_error_upper_bound": 0, 
      "sum_other_doc_count": 0, 
      "buckets": [ 
       { 
       "key": "female", 
       "doc_count": 13 
       }, 
       { 
       "key": "male", 
       "doc_count": 3 
       } 
      ] 
      } 
     }, 
     { 
      "key": "street", 
      "doc_count": 15, 
      "group_by_gender": { 
      "doc_count_error_upper_bound": 0, 
      "sum_other_doc_count": 0, 
      "buckets": [ 
       { 
       "key": "female", 
       "doc_count": 11 
       }, 
       { 
       "key": "male", 
       "doc_count": 4 
       } 
      ] 
      } 
     }, 
     { 
      "key": "ave", 
      "doc_count": 14, 
      "group_by_gender": { 
      "doc_count_error_upper_bound": 0, 
      "sum_other_doc_count": 0, 
      "buckets": [ 
       { 
       "key": "female", 
       "doc_count": 7 
       }, 
       { 
       "key": "male", 
       "doc_count": 7 
       } 
      ] 
      } 
     }, 
     { 
      "key": "box", 
      "doc_count": 11, 
      "group_by_gender": { 
      "doc_count_error_upper_bound": 0, 
      "sum_other_doc_count": 0, 
      "buckets": [ 
       { 
       "key": "female", 
       "doc_count": 9 
       }, 
       { 
       "key": "male", 
       "doc_count": 2 
       } 
      ] 
      } 
     }, 
     { 
      "key": "fl", 
      "doc_count": 11, 
      "group_by_gender": { 
      "doc_count_error_upper_bound": 0, 
      "sum_other_doc_count": 0, 
      "buckets": [ 
       { 
       "key": "female", 
       "doc_count": 9 
       }, 
       { 
       "key": "male", 
       "doc_count": 2 
       } 
      ] 
      } 
     }, 
     { 
      "key": "va", 
      "doc_count": 11, 
      "group_by_gender": { 
      "doc_count_error_upper_bound": 0, 
      "sum_other_doc_count": 0, 
      "buckets": [ 
       { 
       "key": "male", 
       "doc_count": 6 
       }, 
       { 
       "key": "female", 
       "doc_count": 5 
       } 
      ] 
      } 
     }, 
     { 
      "key": "n", 
      "doc_count": 10, 
      "group_by_gender": { 
      "doc_count_error_upper_bound": 0, 
      "sum_other_doc_count": 0, 
      "buckets": [ 
       { 
       "key": "female", 
       "doc_count": 7 
       }, 
       { 
       "key": "male", 
       "doc_count": 3 
       } 
      ] 
      } 
     }, 
     { 
      "key": "az", 
      "doc_count": 9, 
      "group_by_gender": { 
      "doc_count_error_upper_bound": 0, 
      "sum_other_doc_count": 0, 
      "buckets": [ 
       { 
       "key": "female", 
       "doc_count": 7 
       }, 
       { 
       "key": "male", 
       "doc_count": 2 
       } 
      ] 
      } 
     } 
     ] 
    } 
    } 
}

источник

2017-02-19 Adrian

По умолчанию terms агрегации возвращают первые 10 терминов, но вы можете решить, чтобы вернуться более, указав в своем агрегации size, например:

{ 
    "size": 0, 
    "aggs": { 
    "group_by_age": { 
     "terms": { 
     "field": "mailingAddress", 
     "size": 50      <---- add this 
     }, 
     "aggs": { 
     "group_by_gender": { 
      "terms": { 
      "field": "gender" 
      } 
     } 
     } 
    } 
    } 
}

Yo пробег может варьироваться, и вам может потребоваться увеличить размер, чтобы действительно увидеть NY.

источник

2017-02-19 05:11:01 Val

Я надеялся, что это так! – Adrian

Агрегация ElasticSearch всеми токенами в поле строки

ответ

Смежные вопросы