16

I have 6k of data to update in ElasticSearch. And I have to use PHP. I search in the documentation and I have found this, Bulk Indexing but this is not keeping the previous data.

I have structure:

[
  {
    'name': 'Jonatahn',
    'age' : 21
  }
]

My code snippet to update:

$params =[
    "index" => "customer",
    "type" => "doc",
    "body" => [
        [
            "index" => [
                "_index" => "customer",
                "_type" => "doc",
                "_id" => "09310451939"
            ]
        ],
        [
            "name" => "Jonathan"
        ]
    ]
];

$client->bulk($params);

When I send ['name' => 'Jonathan'] I expect the name will be updated and keep the age, but the age gets deleted. Sure, I still can update data-by-data but this will take a long time, is there any better way to do that?

Muhammad Waqas Dilawar
  • 1,844
  • 1
  • 23
  • 34
Jonathan Machado
  • 522
  • 4
  • 14

4 Answers4

12

My error was to using "index", but the correct way to do what I want, was "update".

The final code is:

$params =[
"index" => "customer",
"type" => "doc",
"body" => [
    [
        "update" => [
    //   ^^^^^^ Here I change from index to update
            "_index" => "customer",
            "_type" => "doc",
            "_id" => "09310451939"
        ]
    ],
    [
        "doc" => [
            "name" => "Jonathan"
        ]
    ]
]
];

$client->bulk($params);

Using the code above, my data keep previous data and just update the data I passing in params.

Response:

Array
(
    [took] => 7
    [timed_out] =>
    [_shards] => Array
        (
            [total] => 5
            [successful] => 5
            [skipped] => 0
            [failed] => 0
        )

    [hits] => Array
        (
            [total] => 1
            [max_score] => 1
            [hits] => Array
                (
                    [0] => Array
                        (
                            [_index] => customer
                            [_type] => doc
                            [_id] => 09310451939
                            [_score] => 1
                            [_source] => Array
                                (
                                    [name] => Jonathan
                                    [age] => 23
                                )

                        )

                )

        )

)
Abdul Rafay
  • 787
  • 6
  • 21
Jonathan Machado
  • 522
  • 4
  • 14
4

As per docs, Bulk API possible actions are index, create, delete and update. update expects that the partial doc, upsert and script and its options are specified on the next line.

POST _bulk
{ "update" : {"_id" : "1", "_type" : "_doc", "_index" : "test"} }
{ "doc" : {"field2" : "value2"} }
panchicore
  • 11,451
  • 12
  • 74
  • 100
  • how can i do this using PHP ? i used above answer's code and not all documents are being updated, any clue ? thanks, here is my issue https://github.com/elastic/elasticsearch-php/issues/785 –  Jul 18 '18 at 14:30
3

Here is my final code.

<?php

require_once('../elasticsearch.php');

//initialize elasticsearch
$params = array();

$params['index'] = $elastcsearch_index;
$params['type']  = $elastcsearch_type;

///////////////////////////////////////////////////
//update seeders n leechers in elasticsearch 

//get updated records
$get_updated_records = mysqli_query($conn, "SELECT content_id, seeders, leechers FROM content WHERE is_updated = '1' order by seeders DESC") ;

//create blank array
$results = array();

while($row = mysqli_fetch_assoc($get_updated_records)){
    //put all results in array
    $results[] = $row;

}   

//from https://www.elastic.co/guide/en/elasticsearch/client/php-api/current/_indexing_documents.html

$params = ['body' => []];

for($i = 0; $i < count($results); $i++) {

    $params["body"][]= [
            "update" => [
                "_index" => $elastcsearch_index,
                "_type" => $elastcsearch_type,
                "_id" => $results[$i]['content_id']
            ]
        ];

    $params["body"][]= [
            "doc" => [
                "seeders" => intval($results[$i]['seeders']) ,
                "leechers" => intval($results[$i]['leechers']) ,
            ]
        ];

    // Every 1000 documents stop and send the bulk request
     if ($i % 1000 == 0) {
        $responses = $elasticsearch->bulk($params);

        // erase the old bulk request
        $params = ['body' => []];

        // unset the bulk response when you are done to save memory
        unset($responses);
    } 
}

// Send the last batch if it exists
if (!empty($params['body'])) {
    $responses = $elasticsearch->bulk($params);
}
0

$batch_elastics is array of result i just unset this two value from row each time.... because I don't need this value in insert or update

unset($batch_row['type']);

unset($batch_row['diamonds_id']);

code start from here...

    if(count($batch_elastics)){
        // echo 'hi';die;
        $params = array();                
        $params = ['body' => []]; 
        $i=1;       
        foreach($batch_elastics as $batch_row){
            $type=$batch_row['type'];
            $id=$batch_row['diamonds_id'];
            unset($batch_row['type']);
            unset($batch_row['diamonds_id']); 
            if($type=="create"){                                    
                $params["body"][]= [
                        "create" => [
                            "_index" => 'diamonds',                                                        
                            "_id" => $id,
                        ]
                    ];        
                    $params["body"][]= $batch_row;                             
                if ($i % 1000 == 0) {
                    $responses = $client->bulk($params);                                
                    $params = ['body' => []];                                
                    unset($responses);
                }
            } 
            $i=$i+1;
        }
        
        // Send the last batch if it exists
        if (!empty($params['body'])) {
            $responses = $client->bulk($params);
        }
        $params = array();                
        $params = ['body' => []]; 
        $i=1; 
        foreach($batch_elastics as $batch_row){
            $type=$batch_row['type'];
            $id=$batch_row['diamonds_id'];
            unset($batch_row['type']);
            unset($batch_row['diamonds_id']); 
            if($type=="update"){                                    
                $params["body"][]= [
                        "update" => [
                            "_index" => 'diamonds',                                                        
                            "_id" => $id,
                        ]
                    ];        
                $params["body"][]= [
                    "doc"=>$batch_row
                ];                           
                if ($i % 1000 == 0) {
                    $responses = $client->bulk($params);                                
                    $params = ['body' => []];                                
                    unset($responses);
                }
            } 
            $i=$i+1;
        }
        
        // Send the last batch if it exists
        if (!empty($params['body'])) {
            $responses = $client->bulk($params);
        }
    }
Peter Csala
  • 17,736
  • 16
  • 35
  • 75