Friday, November 07, 2014

Elasticsearch multilevel aggregation

/*
SELECT count(*)
FROM docs
GROUP BY storm_data_spout.task_id
UNION
SELECT count(*)
FROM docs
GROUP BY storm_data_bolt.task_id
*/

{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "bolt": {
      "terms": {
        "field": "storm_data_spout.task_id"
      }
    },
    "spout": {
      "terms": {
        "field": "storm_data_bolt.task_id"
      }
    }
  }
}

// ======================

/*
SELECT count(*)
FROM docs
GROUP BY storm_data_spout.task_id, storm_data_bolt.task_id
-- embedded agg not supported for multilevel using terms agg. Using script workaround per http://bit.ly/1uI76eO
*/

{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "spout-bolt": {
      "terms": {
        "script": "doc['storm_data_spout.task_id'].getValues() + '|' + doc['storm_data_bolt.task_id'].getValues()"
      }
    }
  }
}

No comments: