{"id":3467,"date":"2021-06-20T01:32:02","date_gmt":"2021-06-19T17:32:02","guid":{"rendered":"https:\/\/wyxxt.org.cn\/?p=3467"},"modified":"2023-12-04T15:16:37","modified_gmt":"2023-12-04T07:16:37","slug":"%e6%8e%a8%e8%8d%90%e7%b3%bb%e7%bb%9f%e5%ae%9e%e6%93%8d","status":"publish","type":"post","link":"https:\/\/wyxxt.org.cn\/?p=3467","title":{"rendered":"\u63a8\u8350\u7cfb\u7edf\u5b9e\u64cd"},"content":{"rendered":"<h1>\u73af\u5883\u51c6\u5907<\/h1>\n<p><img decoding=\"async\" src=\"https:\/\/oss.wyxxt.org.cn\/images\/2021\/09\/18\/wp_editor_md_bcad5b3b5edcbd5d8c0fd711b77aef69.jpg\" alt=\"\" \/><\/p>\n<p><img decoding=\"async\" src=\"https:\/\/oss.wyxxt.org.cn\/images\/2021\/09\/18\/wp_editor_md_27c6d3c258f30c223b998f806d3f621c.jpg\" alt=\"\" \/><\/p>\n<p><img decoding=\"async\" src=\"https:\/\/oss.wyxxt.org.cn\/images\/2021\/09\/18\/wp_editor_md_fe57e4d2119eaa5fe958aa836a05df71.jpg\" alt=\"\" \/><\/p>\n<p><img decoding=\"async\" src=\"https:\/\/oss.wyxxt.org.cn\/images\/2021\/09\/18\/wp_editor_md_74cc247224f3526070bfdf707819f839.jpg\" alt=\"\" \/><\/p>\n<h1>\u6570\u636e\u9884\u5904\u7406<\/h1>\n<h2>hive\u5efa\u8868<\/h2>\n<h3>\u5e94\u7528\u8bcd\u8868\uff1a<\/h3>\n<pre><code class=\"language-sql line-numbers\">CREATE EXTERNAL TABLE IF NOT EXISTS dim_rcm_hitop_id_list_ds\n(\n    hitop_id    STRING,\n    name        STRING,\n    author      STRING,\n    sversion    STRING,\n    ischarge    SMALLINT,\n    designer    STRING,\n    font        STRING,\n    icon_count  INT,\n    stars       DOUBLE,\n    price       INT,\n    file_size   INT,\n    comment_num INT,\n    screen      STRING,\n    dlnum       INT\n)row format delimited fields terminated by '\\t';\n<\/code><\/pre>\n<h3>\u7528\u6237\u5386\u53f2\u4e0b\u8f7d\u8868\uff1a<\/h3>\n<pre><code class=\"language-sql line-numbers\">CREATE EXTERNAL TABLE IF NOT EXISTS dw_rcm_hitop_userapps_dm\n(\n    device_id           STRING,\n    devid_applist       STRING,\n    device_name         STRING,\n    pay_ability         STRING\n)row format delimited fields terminated by '\\t';\n<\/code><\/pre>\n<h3>\u6b63\u8d1f\u4f8b\u6837\u672c\u8868\uff1a<\/h3>\n<pre><code class=\"language-sql line-numbers\">CREATE EXTERNAL TABLE IF NOT EXISTS dw_rcm_hitop_sample2learn_dm\n(\n    label       STRING,\n    device_id   STRING,\n    hitop_id    STRING,\n    screen      STRING,\n    en_name     STRING,\n    ch_name     STRING,\n    author      STRING,\n    sversion    STRING,\n    mnc         STRING,\n    event_local_time STRING,\n    interface   STRING,\n    designer    STRING,\n    is_safe     INT,\n    icon_count  INT,\n    update_time STRING,\n    stars       DOUBLE,\n    comment_num INT,\n    font        STRING,\n    price       INT,\n    file_size   INT,\n    ischarge    SMALLINT,\n    dlnum       INT\n)row format delimited fields terminated by '\\t';\n<\/code><\/pre>\n<h2>load\u6570\u636e<\/h2>\n<p>\u5206\u522b\u5f80\u4e09\u5f20\u8868load\u6570\u636e\uff1a<\/p>\n<p>\u5546\u54c1\u8bcd\u8868\uff1a<\/p>\n<pre><code class=\"language-hive line-numbers\">load data local inpath '\/root\/recommender\/data\/applist.txt' into table dim_rcm_hitop_id_list_ds;\n<\/code><\/pre>\n<p>\u7528\u6237\u5386\u53f2\u4e0b\u8f7d\u8868\uff1a<\/p>\n<pre><code class=\"language-hive line-numbers\">load data local inpath '\/root\/recommender\/data\/userdownload.txt' into table dw_rcm_hitop_userapps_dm;\n<\/code><\/pre>\n<p>\u6b63\u8d1f\u4f8b\u6837\u672c\u8868\uff1a<\/p>\n<pre><code class=\"language-hive line-numbers\">load data local inpath '\/root\/recommender\/data\/sample.txt' into table dw_rcm_hitop_sample2learn_dm;\n<\/code><\/pre>\n<h2>\u6784\u5efa\u6d4b\u8bd5\u6570\u636e\u96c6<\/h2>\n<h3>\u521b\u5efa\u4e34\u65f6\u8868<\/h3>\n<p>\u5904\u7406\u6570\u636e\u65f6\u6240\u9700\u8981\u7684\u4e34\u65f6\u8868<\/p>\n<pre><code class=\"language-sql line-numbers\">CREATE TABLE IF NOT EXISTS tmp_dw_rcm_hitop_prepare2train_dm    \n(\n    device_id           STRING,\n    label               STRING,\n    hitop_id            STRING,\n    screen              STRING,\n    ch_name             STRING,\n    author              STRING,\n    sversion            STRING,\n    mnc                 STRING,\n    interface           STRING,\n    designer            STRING,\n    is_safe             INT,\n    icon_count          INT,\n    update_date         STRING,\n    stars               DOUBLE,\n    comment_num         INT,\n    font                STRING,\n    price               INT,\n    file_size           INT,\n    ischarge            SMALLINT,\n    dlnum               INT,\n    idlist              STRING,\n    device_name         STRING,\n    pay_ability         STRING\n)row format delimited fields terminated by '\\t';\n<\/code><\/pre>\n<p>\u6700\u7ec8\u4fdd\u5b58\u8bad\u7ec3\u96c6\u7684\u8868<\/p>\n<pre><code class=\"language-sql line-numbers\">CREATE TABLE IF NOT EXISTS dw_rcm_hitop_prepare2train_dm \n(\n    label                   STRING,\n    features       STRING\n)row format delimited fields terminated by '\\t';\n<\/code><\/pre>\n<h3>\u8bad\u7ec3\u6570\u636e\u9884\u5904\u7406\u8fc7\u7a0b<\/h3>\n<p>\u9996\u5148\u5c06\u6570\u636e\u4ece\u6b63\u8d1f\u4f8b\u6837\u672c\u548c\u7528\u6237\u5386\u53f2\u4e0b\u8f7d\u8868\u6570\u636e\u52a0\u8f7d\u5230\u4e34\u65f6\u8868\u4e2d<\/p>\n<pre><code class=\"language-sql line-numbers\">INSERT OVERWRITE TABLE tmp_dw_rcm_hitop_prepare2train_dm\nSELECT\n    t2.device_id,\n    t2.label,\n    t2.hitop_id,\n    t2.screen,\n    t2.ch_name,\n    t2.author,\n    t2.sversion,\n    t2.mnc,\n    t2.interface,\n    t2.designer,\n    t2.is_safe,\n    t2.icon_count,\n    to_date(t2.update_time),\n    t2.stars,\n    t2.comment_num,\n    t2.font,\n    t2.price,\n    t2.file_size,\n    t2.ischarge,\n    t2.dlnum,\n    t1.devid_applist,\n    t1.device_name,\n    t1.pay_ability\nFROM\n(\n    SELECT\n        device_id,\n        devid_applist,\n        device_name,\n        pay_ability\n    FROM\n        dw_rcm_hitop_userapps_dm\n) t1\nRIGHT OUTER JOIN\n(\n    SELECT\n        device_id,\n        label,\n        hitop_id,\n        screen,\n        ch_name,\n        author,\n        sversion,\n        IF (mnc IN ('00','01','02','03','04','05','06','07'), mnc,'x')      AS   mnc,\n        interface,\n        designer,\n        is_safe,\n        IF (icon_count &lt;= 5,icon_count,6)                                   AS   icon_count,\n        update_time,\n        stars,\n        IF ( comment_num IS NULL,0,\n        IF ( comment_num &lt;= 10,comment_num,11))                             AS   comment_num,\n        font,\n        price,\n        IF (file_size &lt;= 2*1024*1024,2,\n        IF (file_size &lt;= 4*1024*1024,4,\n        IF (file_size &lt;= 6*1024*1024,6,\n        IF (file_size &lt;= 8*1024*1024,8,\n        IF (file_size &lt;= 10*1024*1024,10,\n        IF (file_size &lt;= 12*1024*1024,12,\n        IF (file_size &lt;= 14*1024*1024,14,\n        IF (file_size &lt;= 16*1024*1024,16,\n        IF (file_size &lt;= 18*1024*1024,18,\n        IF (file_size &lt;= 20*1024*1024,20,21))))))))))    AS    file_size,\n        ischarge,\n        IF (dlnum IS NULL,0,\n        IF (dlnum &lt;= 50,50,\n        IF (dlnum &lt;= 100,100,\n        IF (dlnum &lt;= 500,500,\n        IF (dlnum &lt;= 1000,1000,\n        IF (dlnum &lt;= 5000,5000,\n        IF (dlnum &lt;= 10000,10000,\n        IF (dlnum &lt;= 20000,20000,20001))))))))          AS      dlnum\n    FROM\n        dw_rcm_hitop_sample2learn_dm\n) t2\nON (t1.device_id = t2.device_id);\n<\/code><\/pre>\n<p><img decoding=\"async\" src=\"https:\/\/oss.wyxxt.org.cn\/images\/2021\/09\/18\/wp_editor_md_8a15e594efb76b80bec2aac37ab48e04.jpg\" alt=\"\" \/><\/p>\n<p><a class=\"wp-editor-md-post-content-link\" href=\"https:\/\/github.com\/MR820\/recommonder\" title=\"\u4ee3\u7801\u5b9e\u4f8b\">\u4ee3\u7801\u5b9e\u4f8b<\/a><\/p>\n<p>\u7136\u540e\u518d\u5229\u7528python\u811a\u672c\u5904\u7406\u683c\u5f0f<br \/>\n\u8fd9\u91cc\u8981\u5148\u8bb2python\u811a\u672c\u52a0\u8f7d\u5230hive\u4e2d<\/p>\n<pre><code class=\"language-hive line-numbers\">ADD FILE \/root\/recommender\/script\/dw_rcm_hitop_prepare2train_dm.py;\n<\/code><\/pre>\n<p>\u53ef\u4ee5\u901a\u8fc7list files;\u67e5\u770b\u662f\u4e0d\u662fpython\u6587\u4ef6\u52a0\u8f7d\u5230\u4e86hive<\/p>\n<p>\u76ee\u7684\uff1a\u5c06\u5f53\u524d\u6d4f\u89c8\u7684appID\u4e0e\u7528\u6237\u5386\u53f2\u4e0b\u8f7d\u8868\u4e2d\u7684appid\u5f62\u6210\u5173\u8054\u7279\u5f81<\/p>\n<p>\u5728hive\u8bed\u53e5\u4e2d\u8c03\u7528python\u811a\u672c<br \/>\n\u5728hive\u4e2d\u4f7f\u7528python\u811a\u672c\u5904\u7406\u6570\u636e\u7684\u539f\u7406\uff1a<br \/>\nHive\u4f1a\u4ee5\u8f93\u51fa\u6d41\u7684\u5f62\u5f0f\u5c06\u6570\u636e\u4ea4\u7ed9python\u811a\u672c\uff0cpython\u811a\u672c\u4ee5\u8f93\u5165\u6d41\u7684\u5f62\u5f0f\u6765\u63a5\u53d7\u6570\u636e\uff0c\u63a5\u53d7\u6765\u6570\u636e\u4ee5\u540e\uff0c\u5728python\u4e2d\u8fdb\u884c\u4e00\u7cfb\u5217\u7684\u6570\u636e\u5904\u7406\uff0c\u5904\u7406\u5b8c\u6bd5\u540e\uff0c\u53c8\u4ee5\u8f93\u51fa\u6d41\u7684\u5f62\u5f0f\u4ea4\u7ed9Hive\uff0c\u4ea4\u7ed9\u4e86hive\u5c31\u8bf4\u660e\u4e86\u5c31\u5904\u7406\u540e\u7684\u6570\u636e\u6210\u529f\u4fdd\u5b58\u5230hive\u8868\u4e2d\u4e86<\/p>\n<pre><code class=\"language-sql line-numbers\">INSERT OVERWRITE TABLE dw_rcm_hitop_prepare2train_dm\nSELECT\nTRANSFORM (t.*)\nUSING 'python dw_rcm_hitop_prepare2train_dm.py'\nAS (label,features)\nFROM\n(\n    SELECT\n        label,\n        hitop_id,\n        screen,\n        ch_name,\n        author,\n        sversion,\n        mnc,\n        interface,\n        designer,\n        icon_count,\n        update_date,\n        stars,\n        comment_num,\n        font,\n        price,\n        file_size,\n        ischarge,\n        dlnum,\n        idlist,\n        device_name,\n        pay_ability\n    FROM\n        tmp_dw_rcm_hitop_prepare2train_dm\n) t;\n<\/code><\/pre>\n<h3>\u5bfc\u51fa\u8bad\u7ec3\u6570\u636e<\/h3>\n<p>\u5c06\u5904\u7406\u5b8c\u6210\u540e\u7684\u8bad\u7ec3\u6570\u636e\u5bfc\u51fa\u7528\u505a\u7ebf\u4e0b\u8bad\u7ec3\u7684\u6e90\u6570\u636e<\/p>\n<pre><code>\u6ce8\uff1a\u8fd9\u91cc\u662f\u5c06\u6570\u636e\u5bfc\u51fa\u5230\u672c\u5730\uff0c\u65b9\u4fbf\u540e\u9762\u518d\u672c\u5730\u6a21\u5f0f\u8dd1\u6570\u636e\uff0c\u5bfc\u51fa\u6a21\u578b\u6570\u636e\u3002\u8fd9\u91cc\u662f\u65b9\u4fbf\u6f14\u793a\u771f\u6b63\u7684\u751f\u4ea7\u73af\u5883\u662f\u76f4\u63a5\u7528\u811a\u672c\u63d0\u4ea4spark\u4efb\u52a1\uff0c\u4ecehdfs\u53d6\u6570\u636e\u7ed3\u679c\u4ecd\u7136\u5728hdfs\uff0c\u518d\u7528ETL\u5de5\u5177\u5c06\u8bad\u7ec3\u7684\u6a21\u578b\u7ed3\u679c\u6587\u4ef6\u8f93\u51fa\u5230web\u9879\u76ee\u7684\u6587\u4ef6\u76ee\u5f55\u4e0b\uff0c\u7528\u6765\u505a\u65b0\u7684\u6a21\u578b\uff0cweb\u9879\u76ee\u8bbe\u7f6e\u4e86\u5b9a\u65f6\u66f4\u65b0\u6a21\u578b\u6587\u4ef6\uff0c\u6bcf\u5929\u6309\u65f6\u8bfb\u53d6\u65b0\u6a21\u578b\u6587\u4ef6\n<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u73af\u5883\u51c6\u5907 \u6570\u636e\u9884\u5904\u7406 hive\u5efa\u8868 \u5e94\u7528\u8bcd\u8868\uff1a CREATE EXTERNAL TABLE IF NOT EX [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"site-sidebar-layout":"default","site-content-layout":"","ast-site-content-layout":"default","site-content-style":"default","site-sidebar-style":"default","ast-global-header-display":"","ast-banner-title-visibility":"","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","ast-disable-related-posts":"","theme-transparent-header-meta":"","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","astra-migrate-meta-layouts":"default","ast-page-background-enabled":"default","ast-page-background-meta":{"desktop":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"ast-content-background-meta":{"desktop":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"footnotes":""},"categories":[15],"tags":[396],"class_list":["post-3467","post","type-post","status-publish","format-standard","hentry","category-15","tag-396"],"_links":{"self":[{"href":"https:\/\/wyxxt.org.cn\/index.php?rest_route=\/wp\/v2\/posts\/3467","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/wyxxt.org.cn\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/wyxxt.org.cn\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/wyxxt.org.cn\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/wyxxt.org.cn\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=3467"}],"version-history":[{"count":8,"href":"https:\/\/wyxxt.org.cn\/index.php?rest_route=\/wp\/v2\/posts\/3467\/revisions"}],"predecessor-version":[{"id":3475,"href":"https:\/\/wyxxt.org.cn\/index.php?rest_route=\/wp\/v2\/posts\/3467\/revisions\/3475"}],"wp:attachment":[{"href":"https:\/\/wyxxt.org.cn\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=3467"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/wyxxt.org.cn\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=3467"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/wyxxt.org.cn\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=3467"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}