sqoop - kite sdk to demonstrate copying of various file formats to hadoop - apache sqoop - sqoop tutorial - sqoop hadoop
kite sdk to demonstrate copying of various file formats to hadoop
curl http://files.grouplens.org/datasets/movielens/ml-latest-small.zip -o movies.zip
unzip movies.zip
cd ml-latest-small/
Click "Copy code" button to copy into clipboard - By wikitechy - sqoop tutorial - team
-- infer the schema
kite-dataset csv-schema ratings.csv --record-name ratings -o ratings.avsc
cat ratings.avsc
-- create the schema
kite-dataset create ratings --schema ratings.avsc
-- load the data
kite-dataset csv-import ratings.csv --delimiter ',' ratingsClick "Copy code" button to copy into clipboard - By wikitechy - sqoop tutorial - team
-- infer the schema
kite-dataset csv-schema ratings.csv --record-name ratingsp -o ratingsp.avsc
cat ratingsp.avsc
-- create the schema
kite-dataset create ratingsp --schema ratingsp.avsc --format parquet
-- load the data
kite-dataset csv-import ratings.csv --delimiter ',' ratingsp
hive
select avg(rating)from ratings;
select avg(rating)from ratingsp;