How do I iterate over indexed data in script filter query using elasticsearch?

I have following indexed data

{
  "took": 8,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 7992,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "test_index",
        "_id": "33952",
        "default_fee": 12,
        "custom_dates": [
          {
            "date": "2023-11-01",
            "price": 100
          },
          {
            "date": "2023-11-02",
            "price": 50
          }
        ],
        "options": [
          {
            "id": 95,
            "cost": 5,
            "type": [
              "Car"
            ]
          }
        ]
      }
    ]
  }
}

        

I have added a script field as total to calculate total on runtime as follow

{
  script_fields: {
    total: {
      script: {
        source: "
          DateTimeFormatter formatter = DateTimeFormatter.ofPattern('yyyy-MM-dd');
          def from = LocalDate.parse(params.checkin, formatter);
          def to = LocalDate.parse(params.checkout, formatter);
          def stay = params.total_stay;

          def custom_price_dates = [];
          if (params['_source']['custom_dates'] != null && !params['_source']['custom_dates'].isEmpty()) {
            custom_price_dates = params['_source']['custom_dates'].stream()
            .filter(filter_doc -> {
              def date = LocalDate.parse(filter_doc.start_date, formatter);
              return !date.isBefore(from) && !date.isAfter(to.minusDays(1));
            })
            .collect(Collectors.toList());
          }

          def custom_price = custom_price_dates.stream().mapToDouble(custom_doc -> custom_doc.price).sum();
          def default_price = stay == custom_price_dates.size() ? 0 : (stay - custom_price_dates.size()) * params['_source']['default_fee'];
          def calc_price = default_price + custom_price;
          return calc_price; 
        ",
        params: {
          checkin: Date.current.to_s,
          checkout: Date.current.to_s,
          total_stay: 2
        }
      }
    }
  },
  _source: ["*"]
}

This will return total on scripted field. Now I want to filter based on range on above total. How do I achive it? I tried using script query as following but it does not loop through custom_dates as it is nested type.

Also, I cannot index total before as checkin and checkout dates are dynamic and there might be custom price on given checkin and checkout dates. Please suggest.

This can be done, but it is complicated. First of all, we need to understand that this search is executed in two phases – query and fetch. During the query phase each shards collects top 10 hits with their sort keys (_score by default), during the fetch phase the coordinating node collects these ids and sort keys from all shards, selects top 10 from them and then asks each shard to return there documents. The script fields are calculated during the fetch phase and therefore they are not accessible to filters.

To make matter worse you indexed custom dates as nested objects. Internally, nested objects are indexed as separate objects and the only way to pass the information from them to the main query is through the _score. So, basically, to achieve what you are trying to achieve with nested objects you need to encode price into the _score. To simplify the calculations we would need to store the price difference instead of actual price in the nested object. So if default price is 12 and special price is 100, we need to store 88.

Then we can find all nested objects that match our date range:

           {
              "nested": {
                "path": "custom_dates",
                "query": {
                  "range": {
                    "custom_dates.start_date": {
                      "gte": "2023-10-31",
                      "lte": "2023-11-02"
                    }
                  }
                }
              }
            }

Then we can wrap this into script score which will replace score with price:

            {
              "nested": {
                "path": "custom_dates",
                "query": {
                  "script_score": {
                    "script": {
                      "source": "doc['custom_dates.price_adjustment'].value"
                    },
                    "query": {
                      "range": {
                        "custom_dates.start_date": {
                          "gte": "2023-10-31",
                          "lte": "2023-11-02"
                        }
                      }
                    }
                  }
                },
                "score_mode": "sum"
              }
            }

Then we can use another script_score to calculate default price:

            {
              "script_score": {
                "script": {
                  "params": {
                    "total_stay": 3
                  },
                  "source": "doc['default_fee'].value * params.total_stay"
                },
                "query": {
                  "match_all": {}
                }
              }
            }

And then we can combine them together into two should clauses that add scores.

So, now we have _score equal to price assigned to each record. The last step is to filter records by the _score, which can be done by another script_score with min_score parameter:

    "script_score": {
      "query": {
        "bool": {
          "should": [
            {
              .... default price calculation ....
            },
            {
              .... adjusted price calculation ....
            }
          ]
        }
      },
      "script": {
        "source": "if (_score >= params.min_price && _score <=params.max_price) { 1 } else { 0 }",
        "params": {
          "min_price": 100,
          "max_price": 200
        }
      },
      "min_score": 1
    }

If we put this all together we will get something like this:

DELETE test
PUT test
{
  "mappings": {
    "properties": {
      "default_fee": {
        "type": "double"
      },
      "custom_dates": {
        "type": "nested",
        "properties": {
          "start_date": {
            "type": "date"
          },
          "price_adjustment": {
            "type": "double"
          }
        }
      }
    } 
  }
}

PUT test/_doc/33952?refresh
{
  "default_fee": 12,
  "custom_dates": [
    {
      "start_date": "2023-11-01",
      "price_adjustment": 88
    },
    {
      "start_date": "2023-11-02",
      "price_adjustment": 38
    }
  ],
  "options": [
    {
      "id": 95,
      "cost": 5,
      "type": [
        "Car"
      ]
    }
  ]
}

PUT test/_doc/33953?refresh
{
  "default_fee": 24,
  "custom_dates": [
    {
      "start_date": "2023-11-01",
      "price_adjustment": 12
    },
    {
      "start_date": "2023-11-02",
      "price_adjustment": 1
    }
  ],
  "options": [
    {
      "id": 95,
      "cost": 5,
      "type": [
        "Truck"
      ]
    }
  ]
}

POST test/_search
{
  "query": {
    "script_score": {
      "query": {
        "bool": {
          "should": [
            {
              "script_score": {
                "script": {
                  "params": {
                    "total_stay": 3
                  },
                  "source": "doc['default_fee'].value * params.total_stay"
                },
                "query": {
                  "match_all": {}
                }
              }
            },
            {
              "nested": {
                "path": "custom_dates",
                "query": {
                  "script_score": {
                    "script": {
                      "source": "doc['custom_dates.price_adjustment'].value"
                    },
                    "query": {
                      "range": {
                        "custom_dates.start_date": {
                          "gte": "2023-10-31",
                          "lte": "2023-11-02"
                        }
                      }
                    }
                  }
                },
                "score_mode": "sum"
              }
            }
          ]
        }
      },
      "script": {
        "source": "if (_score >= params.min_price && _score <=params.max_price) { 1 } else { 0 }",
        "params": {
          "min_price": 100,
          "max_price": 200
        }
      },
      "min_score": 1
    }
  }
}

Does this work? Yes, to a certain degree. In elasticsarch scores are non-negative 32-bit floating point numbers. So, there is not much precision there, and if your adjustments are negative, it will complicate things even more.

Would I do something like this in production. I would not. What I would do is to store the special dates in the main document in some easily parseable format in such a way that I can access it during query phase. And then parse it from the main document in script query and in a script_field. Yes, you will need to parse it twice, but as I mention at the beginning of my answer there is not much we can do about that since these operations are performed at different stages. The simplest way to do that is to store it as
a multi-valued keyword field. Basically, you can do something like this:

DELETE test
PUT test
{
  "mappings": {
    "properties": {
      "default_fee": {
        "type": "double"
      },
      "custom_dates": {
        "type": "keyword"
      }
    } 
  }
}

PUT test/_doc/33952?refresh
{
  "default_fee": 12,
  "custom_dates": ["2023-11-01:100", "2023-11-02:150"],
  "options": [
    {
      "id": 95,
      "cost": 5,
      "type": [
        "Car"
      ]
    }
  ]
}

PUT test/_doc/33953?refresh
{
  "default_fee": 24,
  "custom_dates": ["2023-11-01:12", "2023-11-02:1"],
  "options": [
    {
      "id": 95,
      "cost": 5,
      "type": [
        "Truck"
      ]
    }
  ]
}


POST test/_search
{
  "query": {
    "script": {
      "script": {
        "source": """
          DateTimeFormatter formatter = DateTimeFormatter.ofPattern('yyyy-MM-dd');
          def from = LocalDate.parse(params.checkin, formatter);
          def to = LocalDate.parse(params.checkout, formatter);
          def stay = java.time.temporal.ChronoUnit.DAYS.between(from, to);

          def custom_prices = [10];
          if (doc.containsKey('custom_dates')) {
            custom_prices = doc['custom_dates'].stream()
            .map(date_price -> {
              def date_price_parsed = date_price.splitOnToken(':');
              def date = LocalDate.parse(date_price_parsed[0], formatter);
              if (!date.isBefore(from) && !date.isAfter(to.minusDays(1))) {
                return Double.parseDouble(date_price_parsed[1]);
              } else {
                return -1;
              }
            })
            .filter(price -> {return price > 0;})
            .collect(Collectors.toList());
          }
          def custom_price = custom_prices.sum();
          def default_price = stay == custom_prices.size() ? 0 : (stay - custom_prices.size()) * doc['default_fee'].value;
          def calc_price = default_price + custom_price;
          return calc_price >= params.min_price && calc_price <= params.max_price; 

        """,
        "params": {
          "checkin": "2023-10-31",
          "checkout": "2023-11-02",
          "min_price": 100,
          "max_price": 200
        }
      }
    }
  },
  "script_fields": {
    "total": {
      "script": {
        "source": """
          DateTimeFormatter formatter = DateTimeFormatter.ofPattern('yyyy-MM-dd');
          def from = LocalDate.parse(params.checkin, formatter);
          def to = LocalDate.parse(params.checkout, formatter);
          def stay = java.time.temporal.ChronoUnit.DAYS.between(from, to);
          
          def custom_prices = [10];
          if (doc.containsKey('custom_dates')) {
            custom_prices = doc['custom_dates'].stream()
            .map(date_price -> {
              def date_price_parsed = date_price.splitOnToken(':');
              def date = LocalDate.parse(date_price_parsed[0], formatter);
              if (!date.isBefore(from) && !date.isAfter(to.minusDays(1))) {
                return Double.parseDouble(date_price_parsed[1]);
              } else {
                return -1;
              }
            })
            .filter(price -> {return price > 0;})
            .collect(Collectors.toList());
          }
          def custom_price = custom_prices.sum();
          def default_price = stay == custom_prices.size() ? 0 : (stay - custom_prices.size()) * doc['default_fee'].value;
          def calc_price = default_price + custom_price;
          return calc_price; 

        """,
        "params": {
          "checkin": "2023-10-31",
          "checkout": "2023-11-02"
        }
      }
    }
  },
  "_source": [
    "*"
  ]
}

Leave a Comment