1

For elasticsearch nested objects, when the nested objects becomes larger i.e thousands to millions of records resulting into large json string size. Is it possible to break down or batch the nested objects in bulk insert? is there other way to accomplish this?

Here is the sample code note: the vertical dots represents range for the sequence of records like (1,2,3…10)

 POST http://localhost:9200/_bulk
 { "index": { "_index": "adventureworks2012","_type": "salesterritory","_id": "1"}}
 {
  "TerritoryID": "1",
  "Name": "Northwest",
  "CountryRegionCode": "US",
  "Group": "North America",
  "SalesYTD": "7887186.7882",
  "SalesLastYear": "3298694.4938",
  "CostYTD": "0.0000",
  "CostLastYear": "0.0000",
  "rowguid": "43689a10-e30b-497f-b0de-11de20267ff7",
  "ModifiedDate": "2002-06-01T00:00:00Z",
   "StateProvince": [
   {
      "StateProvinceID": "23",
      "StateProvinceCode": "ID ",
      "CountryRegionCode": "US",
      "IsOnlyStateProvinceFlag": "False",
      "Name": "Idaho",
      "TerritoryID": "1",
      "rowguid": "628e983a-33c7-4cb4-867f-274ef12b3597",
      "ModifiedDate": "2008-03-11T10:17:21Z",
      "Address": [
        {
          "AddressID": "1",
          "AddressLine1": "2472 Alexander Place",
          "AddressLine2": null,
          "City": "West Covina",
          "StateProvinceID": "23",
          "PostalCode": "83301",
          "SpatialLocation": "POINT (-114.526954354111 42.3275599016735)",
          "rowguid": "482a0800-62f7-4f19-966d-db43140ca308",
          "ModifiedDate": "2006-01-23T00:00:00Z"
        },
        {
          "AddressID": "2",
          "AddressLine1": "8127 Otter Dr.",
          "AddressLine2": null,
          "City": "Boise",
          "StateProvinceID": "23",
          "PostalCode": "83702",
          "SpatialLocation": "POINT (-116.15343204405 43.6894012128085)",
          "rowguid": "a0f2c6be-9b9a-4a02-aab9-e379b37bf2f2",
          "ModifiedDate": "2006-02-25T00:00:00Z"
        },
                        .
                        .
                        .
         {
          "AddressID": "1000",
          "AddressLine1": "22571 South 2500 East",
          "AddressLine2": null,
          "City": "Idaho Falls",
          "StateProvinceID": "23",
          "PostalCode": "83402",
          "SpatialLocation": "POINT (-112.381319181768 43.4931682346105)",
          "rowguid": "1e5743c8-b8f7-4a65-b812-0302ffcb9d0e",
          "ModifiedDate": "2007-07-01T00:00:00Z"
        }

      ]   

    },
            .
            .
            .
    {
      "StateProvinceID": "200",
      "StateProvinceCode": "MT ",
      "CountryRegionCode": "US",
      "IsOnlyStateProvinceFlag": "False",
      "Name": "Montana",
      "TerritoryID": "1",
      "rowguid": "d4ff6e1a-a8e8-4379-a43c-746dbb0d6d13",
      "ModifiedDate": "2008-03-11T10:17:21Z",
       "Address": [
        {
          "AddressID": "1001",
          "AddressLine1": "9830 May Way",
          "AddressLine2": null,
          "City": "Mill Valley",
          "StateProvinceID": "200",
          "PostalCode": "59715",
          "SpatialLocation": "POINT (-110.930516159335 45.7151174605975)",
          "rowguid": "26f279a4-0f7c-446d-8488-8f14b8bcd1b2",
          "ModifiedDate": "2006-01-23T00:00:00Z"
        },
        {
          "AddressID": "1002",
          "AddressLine1": "6061 St. Paul Way",
          "AddressLine2": null,
          "City": "Everett",
          "StateProvinceID": "200",
          "PostalCode": "98201",
          "SpatialLocation": "POINT (-122.164986022411 48.0138374930781)",
          "rowguid": "297d0f3e-701d-44e1-b524-df07a9fc4470",
          "ModifiedDate": "2006-01-23T00:00:00Z"
        },
                        .
                        .
                        .
        {
          "AddressID": "1200",
          "AddressLine1": "2313 B Southampton Rd",
          "AddressLine2": null,
          "City": "Missoula",
          "StateProvinceID": "200",
          "PostalCode": "59801",
          "SpatialLocation": "POINT (-113.976004412717 46.7906154220032)",
          "rowguid": "04cb8657-ab4a-4ae3-966a-e69d0eee743e",
          "ModifiedDate": "2006-01-24T00:00:00Z"
        }

       ]
     }
 ] 


}

                                         .
                                         .
                                         .
{"index": {"_index": "adventureworks2012", "_type": "salesterritory", "_id": "200"}}
{
  "TerritoryID": "2",
  "Name": "Northeast",
  "CountryRegionCode": "US",
  "Group": "North America",
  "SalesYTD": "2402176.8476",
  "SalesLastYear": "3607148.9371",
  "CostYTD": "0.0000",
  "CostLastYear": "0.0000",
  "rowguid": "00fb7309-96cc-49e2-8363-0a1ba72486f2",
  "ModifiedDate": "2002-06-01T00:00:00Z",
   "StateProvince": [
     {
      "StateProvinceID": "201",
      "StateProvinceCode": "CT ",
      "CountryRegionCode": "US",
      "IsOnlyStateProvinceFlag": "False",
      "Name": "Connecticut",
      "TerritoryID": "2",
      "rowguid": "1e7bb47a-e16b-4968-86fa-45af0211fa84",
      "ModifiedDate": "2008-03-11T10:17:21Z",
      "Address": [
         {
          "AddressID": "1201",
          "AddressLine1": "225 Hyde Rd",
          "AddressLine2": null,
          "City": "Farmington",
          "StateProvinceID": "201",
          "PostalCode": "06032",
          "SpatialLocation": "POINT (-72.9341433097597 41.7540856650113)",
          "rowguid": "73e14c8f-86ab-420c-baaa-de965946f79e",
          "ModifiedDate": "2007-08-01T00:00:00Z"
        },
                            .
                            .
                            .
        {
          "AddressID": "1500",
          "AddressLine1": "9963 Tresser Blvd.",
          "AddressLine2": null,
          "City": "Stamford",
          "StateProvinceID": "201",
          "PostalCode": "06901",
          "SpatialLocation": "POINT (-73.4402758570587 41.022811357022)",
          "rowguid": "9c27c691-ece2-4a66-bc73-abb7ec5400da",
          "ModifiedDate": "2006-07-01T00:00:00Z"
        }
       ]
     },
                            .
                            .
                            .
     {
      "StateProvinceID": "500",
      "StateProvinceCode": "IN ",
      "CountryRegionCode": "US",
      "IsOnlyStateProvinceFlag": "False",
      "Name": "Indiana",
      "TerritoryID": "2",
      "rowguid": "91f21ef0-c528-4310-bb29-6ba45ae75a17",
      "ModifiedDate": "2008-03-11T10:17:21Z",
        "Address": [
         {
          "AddressID": "1501",
          "AddressLine1": "42522 Northrupp",
          "AddressLine2": null,
          "City": "Fort Wayne",
          "StateProvinceID": "500",
          "PostalCode": "46807",
          "SpatialLocation": "POINT (-85.270449284881 41.0024170357565)",
          "rowguid": "851c4fae-9d9b-4fc5-8c02-3f454ac4ed9a",
          "ModifiedDate": "2005-07-01T00:00:00Z"
        },
                            .
                            .
                            .
        {
          "AddressID": "2500",
          "AddressLine1": "89950 Castleton Corner",
          "AddressLine2": null,
          "City": "Indianapolis",
          "StateProvinceID": "500",
          "PostalCode": "46204",
          "SpatialLocation": "POINT (-86.0919828741368 39.8918105694471)",
          "rowguid": "a172a445-427f-4e6c-a2b4-1368e9d41dc3",
          "ModifiedDate": "2006-07-01T00:00:00Z"
         },
        ]

     }


   ]


}
Red Mask
  • 11
  • 2
  • 1
    If you have that much nesting going on in one document, I would suggest ripping them out into different types. – ryanlutgen Nov 22 '16 at 03:15
  • And make sure to have each document on a single line. I think you pretty printed them for illustration purposes, but the bulk API won't accept those docs as printed above. Aside from that, I think it would clearly make plain sense to break them out into one document per address. – Val Nov 22 '16 at 04:30
  • @ryanlutgen but I need a relationship between sales territory, state province and address and parent/child won't work for me because there is possibility that the child will have multiple parent. Is there a better approach? – Red Mask Nov 23 '16 at 21:21
  • @Val Yeah.. that's for illustration purposes only in order to understand my question. – Red Mask Nov 23 '16 at 21:24
  • This answer might help and shows how to model your data to keep the relationships in your data: http://stackoverflow.com/questions/36915428/how-to-setup-elasticsearch-index-structure-with-multiple-entity-bindings/36982705#36982705 (hint: denormalization) – Val Nov 24 '16 at 04:23

0 Answers0