SERDE is a combination of Serializer and Deserializer i.e SERIALIZER + DESERIALIZER = SERDE. SERDE is popularly used to load from sources storing data in JSON format.
CREATE TABLE order_json ( order_id INT, order_date STRING, cust_id STRING, order_status STRING ) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' STORED AS TEXTFILE;
LOAD DATA local inpath '/Users/username/hive/data_files/order_json.txt' OVERWRITE INTO TABLE order_json;
Failed with exception java.io.IOException:org.apache.hadoop.hive.serde2.SerDeException: org.codehaus.jackson.JsonParseException: Current token (VALUE_STRING) not numeric, can not use numeric value accessorsFor example: Note the difference between below 2 JSONs messages(marked in BOLD)
{"order_id":68784,"order_date":"2014-01-14 00:00:00.0","cust_id":"10349","order_status":"COMPLETE"}
{"order_id":68784,"order_date":"2014-01-14 00:00:00.0","cust_id":10349,"order_status":"COMPLETE"}
CREATE TABLE order_json_number ( order_id INT, order_date STRING, cust_id STRING, order_status STRING ) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' STORED AS TEXTFILE;
LOAD DATA local inpath '/Users/username/hive/data_files/order_json_number.txt' OVERWRITE INTO TABLE order_json_number; --Sample data in JSON file {"order_id":68834,"order_date":"2014-05-01 00:00:00.0","cust_id":6938,"order_status":"COMPLETE"} {"order_id":68835,"order_date":"2014-05-02 00:00:00.0","cust_id":764,"order_status":"COMPLETE"} {"order_id":68836,"order_date":"2014-05-03 00:00:00.0","cust_id":8009,"order_status":"PENDING_PAYMENT"} {"order_id":68837,"order_date":"2014-05-07 00:00:00.0","cust_id":1223,"order_status":"COMPLETE"}