Category Archives: ElasticSearch Crash Course

ElasticSearch

How to install elastic search and kibana

https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html

If starting elastic in docker fails, in powershell try:

wsl -d docker-desktop
sysctl -w vm.max_map_count=262144

Following notes based on: https://www.youtube.com/playlist?list=PL_mJOmq4zsHZYAyK606y7wjQtC0aoE6Es

Cluster and node commands

get _cluster/health

get _nodes/stats

Index commands

# create new index
put favorite_candy

# get mapping for index
get shopping/_mapping

# add new doc, id autogenerated
post favorite_candy/_doc
{
  "name":"Adam",
  "age":33
}

# add new doc, id given
put favorite_candy/_doc/6
{
  "name":"John",
  "age":45
}

# get added doc
get favorite_candy/_doc/6

# add new doc, id given, throws exception if id already exists
put favorite_candy/_create/6
{
  "name":"John",
  "age":45
}

# delete docs from index
POST shopping/_delete_by_query
{
  "query": {
    "range": {
      "UnitPrice": {
        "lte": 0
      }
    }
  }
}

Leaf queries

# find all in index
get truckplanning/_search 

# find by range
get truckplanning/_search 
{
  "query": {
    "range": {
      "Date": {
        "gte":"12.06.2023",
        "lte":"13.06.2023"
      }
    }
  },
  "track_total_hits": true
}

# find any matching word in headline field
get news/_search
{
  "query": {
    "match": {
      "headline": "Shape of you"
    }
  }
}

# find exact phrase in headline field
get news/_search
{
  "query": {
    "match_phrase": {
      "headline": "Shape of you"
    }
  }
}

# multimatch - like match query, but in any of specified fields
# in this case find doc where Obama is an author or is mentioned in headline
get news/_search
{
  "query": {
    "multi_match": {
      "query": "Obama", 
      "fields": ["headline", "authors"]
    }
  }
}

# multimatch with field boosting
# search like above, but if Obama in author is found, it will get higher score
get news/_search
{
  "query": {
    "multi_match": {
      "query": "Obama", 
      "fields": ["headline", "authors^3"]
    }
  }
}

# multimatch for match_phrase (not just match, like before)
get news/_search
{
  "query": {
    "multi_match": {
      "query": "Barack Obama", 
      "fields": ["headline", "authors^3"], 
      "type": "phrase"
    }
  }
}

Compound queries

# combined query - you can mix many different conditions
# filter - decides whether doc is in or out of the results
# must - ranks higher docs which matches condition
# must_not - ranks highter dosc which does not match condition
# should - ranks higher docs which matches condition

# find news with Michelle Obama in headline and politics category
get news/_search
{
  "query": {
    "bool": {
      "must": [
          {
          "match": {
            "category": "POLITICS"
          }
          }, 
          {
              "match_phrase": {
              "headline": "Michelle Obama"
            }
          }
        ]      
    }
  }
}

# find news with Michelle Obama in headline but not in weddings category
get news/_search
{
  "query": {
    "bool": {
      "must": [
          {
              "match_phrase": {
              "headline": "Michelle Obama"
            }
          }
        ], 
        "must_not": [
          {
            "match": {
              "category": "WEDDINGS"
            }
          }
        ]
    }
  }
}

# find docs authored by Obama, but filter out everything outside 2015
get news/_search
{
  "query": {
    "bool": {
      "must": [
          {
              "match_phrase": {
              "authors": "Obama"
            }
          }
        ], 
      "filter": [
        {
          "range": {
            "date": {
              "gte": "2015-01-01",
              "lte": "2015-12-31"
            }
          }
        }
      ]
    }
  }
}

Metric aggregations

# group by category name
get news/_search
{
  "aggregations": {
    "by_category": {
      "terms": {
        "field": "category", 
        "size": 100 // how many categories to show
      }
    }
  }, 
  "track_total_hits": true
}

# sum and do not return top 10 docs, only aggregation value 
# you can calculate also: min, max, avg
get shopping/_search
{
  "size": 0,
  "aggs": {
    "total-qty": {
      "sum": {
        "field": "Quantity"
      }
    }
  }
}

# calculate all basic aggregations at once for field
get shopping/_search
{
  "aggs": {
    "unit-price-stats": {
      "stats": {
        "field": "UnitPrice"
      }
    }
  }
}

# unique count
get shopping/_search
{
  "aggs": {
    "uniqe-customers": {
      "cardinality": {
        "field": "CustomerID"
      }
    }
  }
}

# aggregation with query
# calculates average price in Germany
get shopping/_search
{
  "query": {
    "match": {
      "Country": "Germany"
    }
  },
  "aggs": {
    "avg-price-germany": {
      "avg": {
        "field": "UnitPrice"
      }
    }
  }, 
    "track_total_hits": true
}

Bucket aggregation

#date histogram - grouping by dates
#fixed interval - each time group is the same size (30 minutes, 8 hours, ...)
get shopping/_search
{
  "aggs": {
    "shopping-per-shift": {
      "date_histogram": {
        "field": "InvoiceDate", 
        "fixed_interval": "8h"
      }
    }
  }, 
    "track_total_hits": true
}

#date histogram - grouping by dates
#calendar interval - use calendar unit (1d, 1w, 1M, 1q, 1y)
get shopping/_search
{
  "aggs": {
    "shopping-per-day": {
      "date_histogram": {
        "field": "InvoiceDate",
        "calendar_interval": "1M", 
        "order": { //sort groups
          "_key": "desc"
          //"_count": "desc"
        }
      }
    }
  }, 
    "track_total_hits": true
}

#histogram by metric field - grouping by any metric field
# group transactions by unit prices
get shopping/_search
{
  "size": 0,
  "aggs": {
    "shopping-per-price": {
      "histogram": {
        "field": "UnitPrice",
        "interval": "1000", 
        "order": {
          "_key": "desc"
        }
      }
    }
  }, 
    "track_total_hits": true
}

#range aggregations - group by custom ranges
get shopping/_search
{
  "size": 0,
  "aggs": {
    "shopping-per-price": {
      "range": {
        "field": "UnitPrice",
        "ranges": [
          {
            "to": 50
          },
          {
            "from": 50,
            "to": 500 
          }, 
          {
            "from": 500,
            "to": 1000 
          },        
          {
            "from": 1000
          }
        ]
      }
    }
  }, 
    "track_total_hits": true
}

# terms aggregations - group by term (text field)
# find top 3 shopping countries
get shopping/_search
{
  "size": 0,
  "aggs": {
    "top-shopping-countries": {
      "terms": {
        "field": "Country",
        "order": {
          "_count": "desc"
        }, 
        "size": 3
      }
    }
  }, 
    "track_total_hits": true
}

Combined aggregations

# buckets and metric aggregation with script
# first aggregate by date then sum in each date range
# sum value returned by script
get shopping/_search
{
  "aggs": {
    "shopping-per-month": {
      "date_histogram": {
        "field": "InvoiceDate",
        "calendar_interval": "1M", 
        "order": {
          "_key": "asc"
        }
      }, 
      "aggs": {
        "revenue-per-month": {
          "sum": {
            "script": {
              "source": "doc['UnitPrice'].value * doc['Quantity'].value"
            }
          }
        }
      }
    }
  }, 
    "track_total_hits": true
}

#multiple subaggregations with sorting
#revenue per month and unique customers per month
#max revenue on top
get shopping/_search
{
  "aggs": {
    "shopping-per-day": {
      "date_histogram": {
        "field": "InvoiceDate",
        "calendar_interval": "1M", 
        "order": {
          "revenue-per-month": "desc"
        }
      }, 
      "aggs": {
        "revenue-per-month": {
          "sum": {
            "script": {
              "source": "doc['UnitPrice'].value * doc['Quantity'].value"
            }
          }
        }, 
        "uq-customers-per-month": {
          "cardinality": {
            "field": "CustomerID"
          }
        }
      }
    }
  }, 
    "track_total_hits": true
}

Mapping

Mapping is done dynamically by elastic, if you don’t create your custom mapping.

You can create mapping before inserting any data. After that if you want to change mapping for an existing field, you need to create new index, create mapping, reindex old index.

Filed types:

  • text – string field, used for full text search. Such field passes through analyser which splits text into tokens, lower cases it, removes punktuation marks.
  • keyword – string field, used for exact search, aggregations, sorting. Original values are stored, not analyzed.
# display mapping
get shopping/_mapping

#create mapping for index
PUT shopping2
{
 "mappings": {// your mappings
}

#reindex after mapping change
POST _reindex
{
 "source" : { "index": "shopping1"}, 
 "dest": {"index": "shopping2"}
}

#mapping for runtime field (like calculated column in SQL)
PUT shopping2/_mapping
{
  "runtime": {
    "total": {
      "type": "double",
      "script": {
        "source": "emit(doc['unit_price'].value* doc['quantity'].value)"
      }
    }
  }
}