• hive-学习汽车销售分析


    –汽车销售日志分析–
    show databases;
    create database if not exists sale_db;
    use sale_db ;
    show tables;

    –创建数据仓库
    create external table car_table(
    province string, --省份
    months int, --月
    city string, --市
    district string, --区县
    years int, --年
    model string,–车辆型号
    manufacturer string,–制造商
    brand string, --品牌
    vehicletype string,–车辆类型
    ownership string,–所有权
    nature string, --使用性质
    quantity int,–数量
    enginemodel string,–发动机型号
    displacement int,–排量
    powers double, --功率
    fuel string,–燃料种类
    length1 int,–车长
    width1 int,–车宽
    height1 int,–车高
    length2 int,–厢长
    width2 int,–厢宽
    height2 int,–厢高
    numberofaxles int,–轴数
    wheelbase int,–轴距
    frontwheelbase int,–前轮距
    tirespecification string,–轮胎规格
    tirenumber int,–轮胎数
    totalquality int,–总质量
    completequality int,–整备质量
    approvedquality int,–核定载质量
    approvedpassenger string,–核定载客
    tractionquality int,–准牵引质量
    chassisenterprise string,–底盘企业
    chassisbrand string,–底盘品牌
    chassismodel string,–底盘型号
    engineenterprise string,–发动机企业
    vehiclename string,–车辆名称
    age int,–年龄
    gender string --性别
    )
    row format delimited
    fields terminated by ‘,’
    location ‘/data/sale’
    tblproperties(“skip.header.line.count”=“1”); --跳过文件行首1行;

    SELECT * from car_table;
    –处理空串与null
    alter table car_table set serdeproperties(‘serialization.null.format’=‘’);
    desc formatted car_table ;

    –数据分析
    –数据记录数 70640
    SELECT count(*) from car_table ;

    –乘用车辆(非营运)和商用车辆(非营运以外的)的销售数量和比例
    SELECT
    sum(if(t1.nature=‘非营运’,t1.sal_nums,0)) sycar_nums – 66478
    , sum(if(t1.nature!=‘非营运’,t1.sal_nums,0)) cycar_nums – 3884
    from (
    SELECT
    t.nature
    , COUNT(*) sal_nums
    from car_table t
    where t.nature is not null
    group by t.nature)t1 ;

    –山西省2013年每个月的汽车销售数量比例
    –1\10\11\12月份的销量比较高
    SELECT
    t.months
    , count() months_nums
    , (select count(
    ) from car_table where months is not null) years_nums
    , round(count()/(select count() from car_table where months is not null),4)*100||‘%’ months_raits
    from car_table t
    where t.months is not NULL
    group by t.months;

    –买车的男女比例和男女对车的品牌的选择
    SELECT
    t.gender
    , count()
    , (select count(
    ) from car_table where gender = ‘男性’ or gender = ‘女性’) nv_nums
    , round(count()/(select count() from car_table where gender = ‘男性’ or gender = ‘女性’),4)*100||‘%’ raite
    from car_table t
    where t.gender = ‘男性’ or t.gender = ‘女性’
    group by t.gender;

    SELECT
    t.brand
    , t.gender
    , count()
    from car_table t
    where (t.gender = ‘男性’ or t.gender = ‘女性’) and t.brand is not null
    group by t.brand , t.gender
    order by count(
    ) desc;

    –车的所有权,车辆型号,车辆类型
    SELECT
    t1.concat_str
    , count()
    from (
    SELECT
    concat(model,ownership,vehicletype) concat_str–t.model||t.ownership||t.vehicletype
    from car_table t) t1
    group by t1.concat_str
    order by count(
    ) desc;

    –不同车型在每个月的销售量
    SELECT
    t.months
    , t.vehicletype
    , count()
    from car_table t
    where t.vehicletype is not null and t.months is not null
    group by t.months ,t.vehicletype
    order by t.months ,count(
    ) desc;

    –不同品牌车销售情况,统计发动机和燃料种类 前三全部是五菱
    SELECT
    t.brand, t.enginemodel, t.fuel,count()
    from car_table t
    where t.brand is not null and t.enginemodel is not null and t.fuel is not null
    group by t.brand, t.enginemodel, t.fuel
    order by count(
    ) desc;

    –统计五菱某一年每月的销售量 仍然是1\10\11\12这几个月的销量比较好
    SELECT
    t.brand
    , t.months
    , count(*)
    from car_table t
    where t.brand = ‘五菱’
    group by t.brand ,t.months

  • 相关阅读:
    电厂三维人员定位系统的应用与优势有哪些?
    一个不用充钱也能让你变强的 VSCode 插件!!!
    SM7加密算法:安全与效率的平衡之作
    MATLAB创建avi文件
    电脑重装系统后桌面图标如何调小尺寸
    项目管理之变更管理
    ts如何使用class类?与js的class类有什么区别?
    【C语言】符号的深度理解
    2024年阿里巴巴后端开发校招面试真题汇总及其讲解(一)
    不允许还有Java程序员不了解BlockingQueue阻塞队列的实现原理
  • 原文地址:https://blog.csdn.net/LLMUZI123456789/article/details/128085839