• 首页 首页 icon
  • 工具库 工具库 icon
    • IP查询 IP查询 icon
  • 内容库 内容库 icon
    • 快讯库 快讯库 icon
    • 精品库 精品库 icon
    • 问答库 问答库 icon
  • 更多 更多 icon
    • 服务条款 服务条款 icon

用 StarRocks on ES 实现 分词

武飞扬头像
八寅老爷
帮助1

目录

部署ES

下载

Config

系统参数

启动

Verify

常规用法

创建Index

_cat

查看settings

删除index

Bulk导入数据

Search

SR 外表

测试1:分词

ES

SR


部署ES

  1.  
    sr@cs02:~/app/elasticsearch-7.16.2$grep -v ^# config/elasticsearch.yml
  2.  
    node.name: node-2
  3.  
    network.host: 172.26.194.185
  4.  
    cluster.initial_master_nodes: ["node-2"]
  • 系统参数

vm.max_map_count = 655360
  1.  
    sr@cs02:~/app/elasticsearch-7.16.2$sudo vim /etc/sysctl.conf
  2.  
    sr@cs02:~/app/elasticsearch-7.16.2$sudo sysctl -p
  3.  
    vm.swappiness = 0
  4.  
    kernel.sysrq = 1
  5.  
    net.ipv4.neigh.default.gc_stale_time = 120
  6.  
    net.ipv4.conf.all.rp_filter = 0
  7.  
    net.ipv4.conf.default.rp_filter = 0
  8.  
    net.ipv4.conf.default.arp_announce = 2
  9.  
    net.ipv4.conf.lo.arp_announce = 2
  10.  
    net.ipv4.conf.all.arp_announce = 2
  11.  
    net.ipv4.tcp_max_tw_buckets = 5000
  12.  
    net.ipv4.tcp_syncookies = 1
  13.  
    net.ipv4.tcp_max_syn_backlog = 1024
  14.  
    net.ipv4.tcp_synack_retries = 2
  15.  
    vm.max_map_count = 655360
学新通
  • 启动

sr@cs02:~/app/elasticsearch-7.16.2$bin/elasticsearch -d
  • Verify

  1.  
    sr@cs02:~/app/elasticsearch-7.16.2$sudo netstat -lnpt |grep 9[2-3]00
  2.  
    tcp6 0 0 172.26.194.185:9200 :::* LISTEN 10442/java
  3.  
    tcp6 0 0 172.26.194.185:9300 :::* LISTEN 10442/java

常规用法

  • 创建Index

  1.  
    sr@cs02:~$curl -sH "Content-Type: application/json" -XPUT "cs02:9200/test" | python -m json.tool
  2.  
    {
  3.  
    "acknowledged": true,
  4.  
    "index": "test",
  5.  
    "shards_acknowledged": true
  6.  
    }
  • _cat

  1.  
    sr@cs02:~$curl -s "cs02:9200/_cat/indices?v"
  2.  
    health status index uuid pri rep docs.count docs.deleted store.size pri.store.size
  3.  
    yellow open test poJIPAAJQpW4hq8zXGAg3Q 1 1 0 0 226b 226b
  • 查看settings

  1.  
    sr@cs02:~$curl -sH "Content-Type: application/json" -XGET "cs02:9200/test/_settings/" | python -m json.tool
  2.  
    {
  3.  
    "test": {
  4.  
    "settings": {
  5.  
    "index": {
  6.  
    "creation_date": "1641910831087",
  7.  
    "number_of_replicas": "1",
  8.  
    "number_of_shards": "1",
  9.  
    "provided_name": "test",
  10.  
    "routing": {
  11.  
    "allocation": {
  12.  
    "include": {
  13.  
    "_tier_preference": "data_content"
  14.  
    }
  15.  
    }
  16.  
    },
  17.  
    "uuid": "JQKoH7sKRmi34chGt1n1jg",
  18.  
    "version": {
  19.  
    "created": "7160299"
  20.  
    }
  21.  
    }
  22.  
    }
  23.  
    }
  24.  
    }
学新通
  • 删除index

  1.  
    sr@cs02:~$curl -s -XDELETE "cs02:9200/test" | python -m json.tool
  2.  
    {
  3.  
    "acknowledged": true
  4.  
    }
  • Bulk导入数据

  • Note: 每行要换行
  1.  
     
  2.  
    curl -XPOST "http://cs02:9200/_bulk" -H 'Content-Type: application/json' -d'
  3.  
    {"index":{"_index":"test"}}
  4.  
    { "k1" : 1, "k2": "2022-01-01", "k3": "Trying out Elasticsearch", "k4": "Trying out Elasticsearch", "k5": 10.0}
  5.  
    {"index":{"_index":"test"}}
  6.  
    { "k1" : 2, "k2": "2022-01-02", "k3": "Trying out StarRocks", "k4": "Trying out StarRocks", "k5": 20.0}
  7.  
    {"index":{"_index":"test"}}
  8.  
    { "k1" : 3, "k2": "2022-01-03", "k3": "StarRocks On ES", "k4": "StarRocks On ES", "k5": 30.0}
  9.  
    {"index":{"_index":"test"}}
  10.  
    { "k1" : 4, "k2": "2022-01-04", "k3": "StarRocks", "k4": "StarRocks", "k5": 40.0}
  11.  
    {"index":{"_index":"test"}}
  12.  
    { "k1" : 5, "k2": "2022-01-05", "k3": "ES", "k4": "ES", "k5": 50.0}
  13.  
    '
  • Search

  1.  
    sr@cs02:~$curl -s -XGET cs02:9200/test/_search?pretty
  2.  
    {
  3.  
    "took" : 667,
  4.  
    "timed_out" : false,
  5.  
    "_shards" : {
  6.  
    "total" : 1,
  7.  
    "successful" : 1,
  8.  
    "skipped" : 0,
  9.  
    "failed" : 0
  10.  
    },
  11.  
    "hits" : {
  12.  
    "total" : {
  13.  
    "value" : 5,
  14.  
    "relation" : "eq"
  15.  
    },
  16.  
    "max_score" : 1.0,
  17.  
    "hits" : [
  18.  
    {
  19.  
    "_index" : "test",
  20.  
    "_type" : "_doc",
  21.  
    "_id" : "uW6rSX4B8p-MWLQevhhA",
  22.  
    "_score" : 1.0,
  23.  
    "_source" : {
  24.  
    "k1" : 1,
  25.  
    "k2" : "2022-01-01",
  26.  
    "k3" : "Trying out Elasticsearch",
  27.  
    "k4" : "Trying out Elasticsearch",
  28.  
    "k5" : 10.0
  29.  
    }
  30.  
    },
  31.  
    {
  32.  
    "_index" : "test",
  33.  
    "_type" : "_doc",
  34.  
    "_id" : "um6rSX4B8p-MWLQevhhA",
  35.  
    "_score" : 1.0,
  36.  
    "_source" : {
  37.  
    "k1" : 2,
  38.  
    "k2" : "2022-01-02",
  39.  
    "k3" : "Trying out StarRocks",
  40.  
    "k4" : "Trying out StarRocks",
  41.  
    "k5" : 20.0
  42.  
    }
  43.  
    },
  44.  
    {
  45.  
    "_index" : "test",
  46.  
    "_type" : "_doc",
  47.  
    "_id" : "u26rSX4B8p-MWLQevhhA",
  48.  
    "_score" : 1.0,
  49.  
    "_source" : {
  50.  
    "k1" : 3,
  51.  
    "k2" : "2022-01-03",
  52.  
    "k3" : "StarRocks On ES",
  53.  
    "k4" : "StarRocks On ES",
  54.  
    "k5" : 30.0
  55.  
    }
  56.  
    },
  57.  
    {
  58.  
    "_index" : "test",
  59.  
    "_type" : "_doc",
  60.  
    "_id" : "vG6rSX4B8p-MWLQevhhA",
  61.  
    "_score" : 1.0,
  62.  
    "_source" : {
  63.  
    "k1" : 4,
  64.  
    "k2" : "2022-01-04",
  65.  
    "k3" : "StarRocks",
  66.  
    "k4" : "StarRocks",
  67.  
    "k5" : 40.0
  68.  
    }
  69.  
    },
  70.  
    {
  71.  
    "_index" : "test",
  72.  
    "_type" : "_doc",
  73.  
    "_id" : "vW6rSX4B8p-MWLQevhhA",
  74.  
    "_score" : 1.0,
  75.  
    "_source" : {
  76.  
    "k1" : 5,
  77.  
    "k2" : "2022-01-05",
  78.  
    "k3" : "ES",
  79.  
    "k4" : "ES",
  80.  
    "k5" : 50.0
  81.  
    }
  82.  
    }
  83.  
    ]
  84.  
    }
  85.  
    }
学新通

SR 外表

  • 测试1:分词

  • ES

  • Create index
  1.  
    curl -sH "Content-Type: application/json" -XPUT "cs02:9200/test" -d'
  2.  
    {
  3.  
    "mappings": {
  4.  
     
  5.  
    "properties": {
  6.  
    "k1": {
  7.  
    "type": "long"
  8.  
    },
  9.  
    "k2": {
  10.  
    "type": "date"
  11.  
    },
  12.  
    "k3": {
  13.  
    "type": "keyword"
  14.  
    },
  15.  
    "k4": {
  16.  
    "analyzer": "standard",
  17.  
    "type": "text"
  18.  
    },
  19.  
    "k5": {
  20.  
    "type": "float"
  21.  
    }
  22.  
     
  23.  
    }
  24.  
    },
  25.  
    "settings": {
  26.  
    "index": {
  27.  
    "number_of_replicas": "0",
  28.  
    "number_of_shards": "1"
  29.  
    }
  30.  
    }
  31.  
    }
  32.  
    '
学新通
  • 数据导入ES
  1.  
    curl -XPOST "http://cs02:9200/_bulk" -H 'Content-Type: application/json' -d'
  2.  
    {"index":{"_index":"test"}}
  3.  
    { "k1" : 1, "k2": "2022-01-01", "k3": "Trying out Elasticsearch", "k4": "Trying out Elasticsearch", "k5": 10.0}
  4.  
    {"index":{"_index":"test"}}
  5.  
    { "k1" : 2, "k2": "2022-01-02", "k3": "Trying out StarRocks", "k4": "Trying out StarRocks", "k5": 20.0}
  6.  
    {"index":{"_index":"test"}}
  7.  
    { "k1" : 3, "k2": "2022-01-03", "k3": "StarRocks On ES", "k4": "StarRocks On ES", "k5": 30.0}
  8.  
    {"index":{"_index":"test"}}
  9.  
    { "k1" : 4, "k2": "2022-01-04", "k3": "StarRocks", "k4": "StarRocks", "k5": 40.0}
  10.  
    {"index":{"_index":"test"}}
  11.  
    { "k1" : 5, "k2": "2022-01-05", "k3": "ES", "k4": "ES", "k5": 50.0}
  12.  
    '
  • SR

mysql -uroot -hcs01 -P 9013
  1.  
    USE simon;
  2.  
     
  3.  
    -- 对ES中字符串类型分词类型(text) fields 进行探测
  4.  
    CREATE EXTERNAL TABLE `soe_t1` (
  5.  
    `k1` bigint(20) NULL COMMENT "",
  6.  
    `k2` datetime NULL COMMENT "",
  7.  
    `k3` varchar(20) NULL COMMENT "",
  8.  
    `k4` varchar(100) NULL COMMENT "",
  9.  
    `k5` float NULL COMMENT ""
  10.  
    ) ENGINE=ELASTICSEARCH
  11.  
    COMMENT "ELASTICSEARCH"
  12.  
    PROPERTIES (
  13.  
    "hosts" = "cs02:9200",
  14.  
    "index" = "test",
  15.  
    "type" = "_doc",
  16.  
    "transport" = "http",
  17.  
    "enable_docvalue_scan" = "true",
  18.  
    "max_docvalue_fields" = "20",
  19.  
    "enable_keyword_sniff" = "true"
  20.  
    );
学新通
  • 需要稍等几秒,同步后再查询
  1.  
    MySQL [simon]> select * from soe_t1;
  2.  
    ERROR 1064 (HY000): EsTable metadata has not been synced, Try it later
  1.  
    MySQL [simon]> desc soe_t1;
  2.  
    ------- -------------- ------ ------- --------- -------
  3.  
    | Field | Type | Null | Key | Default | Extra |
  4.  
    ------- -------------- ------ ------- --------- -------
  5.  
    | k1 | BIGINT | Yes | true | NULL | |
  6.  
    | k2 | DATETIME | Yes | true | NULL | |
  7.  
    | k3 | VARCHAR(20) | Yes | true | NULL | |
  8.  
    | k4 | VARCHAR(100) | Yes | false | NULL | NONE |
  9.  
    | k5 | FLOAT | Yes | false | NULL | NONE |
  10.  
    ------- -------------- ------ ------- --------- -------
  11.  
    5 rows in set (0.00 sec)
  12.  
     
  13.  
    mysql> select * from soe_t1;
  14.  
    ------ --------------------- -------------------------- -------------------------- ------
  15.  
    | k1 | k2 | k3 | k4 | k5 |
  16.  
    ------ --------------------- -------------------------- -------------------------- ------
  17.  
    | 1 | 2022-01-01 00:00:00 | Trying out Elasticsearch | Trying out Elasticsearch | 10 |
  18.  
    | 2 | 2022-01-02 00:00:00 | Trying out StarRocks | Trying out StarRocks | 20 |
  19.  
    | 3 | 2022-01-03 00:00:00 | StarRocks On ES | StarRocks On ES | 30 |
  20.  
    | 4 | 2022-01-04 00:00:00 | StarRocks | StarRocks | 40 |
  21.  
    | 5 | 2022-01-05 00:00:00 | ES | ES | 50 |
  22.  
    ------ --------------------- -------------------------- -------------------------- ------
  23.  
    5 rows in set (0.01 sec)
  24.  
     
  25.  
    mysql> select * from soe_t1 where k5 > 30;
  26.  
    ------ --------------------- ----------- ----------- ------
  27.  
    | k1 | k2 | k3 | k4 | k5 |
  28.  
    ------ --------------------- ----------- ----------- ------
  29.  
    | 4 | 2022-01-04 00:00:00 | StarRocks | StarRocks | 40 |
  30.  
    | 5 | 2022-01-05 00:00:00 | ES | ES | 50 |
  31.  
    ------ --------------------- ----------- ----------- ------
  32.  
    2 rows in set (0.01 sec)
  33.  
     
  34.  
    -- 非分词列,精确匹配
  35.  
    mysql> select * from soe_t1 where k3 = 'ES';
  36.  
    ------ --------------------- ------ ------ ------
  37.  
    | k1 | k2 | k3 | k4 | k5 |
  38.  
    ------ --------------------- ------ ------ ------
  39.  
    | 5 | 2022-01-05 00:00:00 | ES | ES | 50 |
  40.  
    ------ --------------------- ------ ------ ------
  41.  
    1 row in set (0.38 sec)
  42.  
     
  43.  
    -- 分词类型(text),按小写分词
  44.  
    mysql> select * from soe_t1 where k4 = 'es';
  45.  
    ------ --------------------- ----------------- ----------------- ------
  46.  
    | k1 | k2 | k3 | k4 | k5 |
  47.  
    ------ --------------------- ----------------- ----------------- ------
  48.  
    | 3 | 2022-01-03 00:00:00 | StarRocks On ES | StarRocks On ES | 30 |
  49.  
    | 5 | 2022-01-05 00:00:00 | ES | ES | 50 |
  50.  
    ------ --------------------- ----------------- ----------------- ------
  51.  
    2 rows in set (0.01 sec)
  52.  
     
  53.  
    mysql> select * from soe_t1 where k4 = 'starrocks';
  54.  
    ------ --------------------- ---------------------- ---------------------- ------
  55.  
    | k1 | k2 | k3 | k4 | k5 |
  56.  
    ------ --------------------- ---------------------- ---------------------- ------
  57.  
    | 2 | 2022-01-02 00:00:00 | Trying out StarRocks | Trying out StarRocks | 20 |
  58.  
    | 3 | 2022-01-03 00:00:00 | StarRocks On ES | StarRocks On ES | 30 |
  59.  
    | 4 | 2022-01-04 00:00:00 | StarRocks | StarRocks | 40 |
  60.  
    ------ --------------------- ---------------------- ---------------------- ------
  61.  
    3 rows in set (0.01 sec)
  62.  
     
  63.  
    -- 标准分词器,按小写分词
  64.  
    mysql> select * from soe_t1 where k4 = 'ES';
  65.  
    Empty set (0.01 sec)
  66.  
     
  67.  
    -- esquery
  68.  
    mysql> select * from soe_t1 where esquery(k4, '{
  69.  
    "match": {
  70.  
    "k4": "es"
  71.  
    }
  72.  
    }');
  73.  
    ------ --------------------- ----------------- ----------------- ------
  74.  
    | k1 | k2 | k3 | k4 | k5 |
  75.  
    ------ --------------------- ----------------- ----------------- ------
  76.  
    | 3 | 2022-01-03 00:00:00 | StarRocks On ES | StarRocks On ES | 30 |
  77.  
    | 5 | 2022-01-05 00:00:00 | ES | ES | 50 |
  78.  
    ------ --------------------- ----------------- ----------------- ------
  79.  
    2 rows in set (0.01 sec)
学新通

下一篇:

玩转StarRocks on ES-2-全文检索

StarRocks招聘:

招解决方案,DBA,数据库研发,测试,前后端开发等岗位,

有意者请投递简历到 hr@starrocks.com

这篇好文章是转载于:学新通技术网

  • 版权申明: 本站部分内容来自互联网,仅供学习及演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,请提供相关证据及您的身份证明,我们将在收到邮件后48小时内删除。
  • 本站站名: 学新通技术网
  • 本文地址: /boutique/detail/tanhggfkkf
系列文章
更多 icon
同类精品
更多 icon
继续加载