用 StarRocks on ES 实现 分词
目录
部署ES
-
sr@cs02:~/app/elasticsearch-7.16.2$grep -v ^# config/elasticsearch.yml
-
node.name: node-2
-
network.host: 172.26.194.185
-
cluster.initial_master_nodes: ["node-2"]
vm.max_map_count = 655360
-
sr@cs02:~/app/elasticsearch-7.16.2$sudo vim /etc/sysctl.conf
-
sr@cs02:~/app/elasticsearch-7.16.2$sudo sysctl -p
-
vm.swappiness = 0
-
kernel.sysrq = 1
-
net.ipv4.neigh.default.gc_stale_time = 120
-
net.ipv4.conf.all.rp_filter = 0
-
net.ipv4.conf.default.rp_filter = 0
-
net.ipv4.conf.default.arp_announce = 2
-
net.ipv4.conf.lo.arp_announce = 2
-
net.ipv4.conf.all.arp_announce = 2
-
net.ipv4.tcp_max_tw_buckets = 5000
-
net.ipv4.tcp_syncookies = 1
-
net.ipv4.tcp_max_syn_backlog = 1024
-
net.ipv4.tcp_synack_retries = 2
-
vm.max_map_count = 655360
sr@cs02:~/app/elasticsearch-7.16.2$bin/elasticsearch -d
-
sr@cs02:~/app/elasticsearch-7.16.2$sudo netstat -lnpt |grep 9[2-3]00
-
tcp6 0 0 172.26.194.185:9200 :::* LISTEN 10442/java
-
tcp6 0 0 172.26.194.185:9300 :::* LISTEN 10442/java
常规用法
-
sr@cs02:~$curl -sH "Content-Type: application/json" -XPUT "cs02:9200/test" | python -m json.tool
-
{
-
"acknowledged": true,
-
"index": "test",
-
"shards_acknowledged": true
-
}
-
sr@cs02:~$curl -s "cs02:9200/_cat/indices?v"
-
health status index uuid pri rep docs.count docs.deleted store.size pri.store.size
-
yellow open test poJIPAAJQpW4hq8zXGAg3Q 1 1 0 0 226b 226b
-
sr@cs02:~$curl -sH "Content-Type: application/json" -XGET "cs02:9200/test/_settings/" | python -m json.tool
-
{
-
"test": {
-
"settings": {
-
"index": {
-
"creation_date": "1641910831087",
-
"number_of_replicas": "1",
-
"number_of_shards": "1",
-
"provided_name": "test",
-
"routing": {
-
"allocation": {
-
"include": {
-
"_tier_preference": "data_content"
-
}
-
}
-
},
-
"uuid": "JQKoH7sKRmi34chGt1n1jg",
-
"version": {
-
"created": "7160299"
-
}
-
}
-
}
-
}
-
}
-
sr@cs02:~$curl -s -XDELETE "cs02:9200/test" | python -m json.tool
-
{
-
"acknowledged": true
-
}
- Note: 每行要换行
-
-
curl -XPOST "http://cs02:9200/_bulk" -H 'Content-Type: application/json' -d'
-
{"index":{"_index":"test"}}
-
{ "k1" : 1, "k2": "2022-01-01", "k3": "Trying out Elasticsearch", "k4": "Trying out Elasticsearch", "k5": 10.0}
-
{"index":{"_index":"test"}}
-
{ "k1" : 2, "k2": "2022-01-02", "k3": "Trying out StarRocks", "k4": "Trying out StarRocks", "k5": 20.0}
-
{"index":{"_index":"test"}}
-
{ "k1" : 3, "k2": "2022-01-03", "k3": "StarRocks On ES", "k4": "StarRocks On ES", "k5": 30.0}
-
{"index":{"_index":"test"}}
-
{ "k1" : 4, "k2": "2022-01-04", "k3": "StarRocks", "k4": "StarRocks", "k5": 40.0}
-
{"index":{"_index":"test"}}
-
{ "k1" : 5, "k2": "2022-01-05", "k3": "ES", "k4": "ES", "k5": 50.0}
-
'
-
sr@cs02:~$curl -s -XGET cs02:9200/test/_search?pretty
-
{
-
"took" : 667,
-
"timed_out" : false,
-
"_shards" : {
-
"total" : 1,
-
"successful" : 1,
-
"skipped" : 0,
-
"failed" : 0
-
},
-
"hits" : {
-
"total" : {
-
"value" : 5,
-
"relation" : "eq"
-
},
-
"max_score" : 1.0,
-
"hits" : [
-
{
-
"_index" : "test",
-
"_type" : "_doc",
-
"_id" : "uW6rSX4B8p-MWLQevhhA",
-
"_score" : 1.0,
-
"_source" : {
-
"k1" : 1,
-
"k2" : "2022-01-01",
-
"k3" : "Trying out Elasticsearch",
-
"k4" : "Trying out Elasticsearch",
-
"k5" : 10.0
-
}
-
},
-
{
-
"_index" : "test",
-
"_type" : "_doc",
-
"_id" : "um6rSX4B8p-MWLQevhhA",
-
"_score" : 1.0,
-
"_source" : {
-
"k1" : 2,
-
"k2" : "2022-01-02",
-
"k3" : "Trying out StarRocks",
-
"k4" : "Trying out StarRocks",
-
"k5" : 20.0
-
}
-
},
-
{
-
"_index" : "test",
-
"_type" : "_doc",
-
"_id" : "u26rSX4B8p-MWLQevhhA",
-
"_score" : 1.0,
-
"_source" : {
-
"k1" : 3,
-
"k2" : "2022-01-03",
-
"k3" : "StarRocks On ES",
-
"k4" : "StarRocks On ES",
-
"k5" : 30.0
-
}
-
},
-
{
-
"_index" : "test",
-
"_type" : "_doc",
-
"_id" : "vG6rSX4B8p-MWLQevhhA",
-
"_score" : 1.0,
-
"_source" : {
-
"k1" : 4,
-
"k2" : "2022-01-04",
-
"k3" : "StarRocks",
-
"k4" : "StarRocks",
-
"k5" : 40.0
-
}
-
},
-
{
-
"_index" : "test",
-
"_type" : "_doc",
-
"_id" : "vW6rSX4B8p-MWLQevhhA",
-
"_score" : 1.0,
-
"_source" : {
-
"k1" : 5,
-
"k2" : "2022-01-05",
-
"k3" : "ES",
-
"k4" : "ES",
-
"k5" : 50.0
-
}
-
}
-
]
-
}
-
}
SR 外表
-
测试1:分词
-
ES
- Create index
-
curl -sH "Content-Type: application/json" -XPUT "cs02:9200/test" -d'
-
{
-
"mappings": {
-
-
"properties": {
-
"k1": {
-
"type": "long"
-
},
-
"k2": {
-
"type": "date"
-
},
-
"k3": {
-
"type": "keyword"
-
},
-
"k4": {
-
"analyzer": "standard",
-
"type": "text"
-
},
-
"k5": {
-
"type": "float"
-
}
-
-
}
-
},
-
"settings": {
-
"index": {
-
"number_of_replicas": "0",
-
"number_of_shards": "1"
-
}
-
}
-
}
-
'
- 数据导入ES
-
curl -XPOST "http://cs02:9200/_bulk" -H 'Content-Type: application/json' -d'
-
{"index":{"_index":"test"}}
-
{ "k1" : 1, "k2": "2022-01-01", "k3": "Trying out Elasticsearch", "k4": "Trying out Elasticsearch", "k5": 10.0}
-
{"index":{"_index":"test"}}
-
{ "k1" : 2, "k2": "2022-01-02", "k3": "Trying out StarRocks", "k4": "Trying out StarRocks", "k5": 20.0}
-
{"index":{"_index":"test"}}
-
{ "k1" : 3, "k2": "2022-01-03", "k3": "StarRocks On ES", "k4": "StarRocks On ES", "k5": 30.0}
-
{"index":{"_index":"test"}}
-
{ "k1" : 4, "k2": "2022-01-04", "k3": "StarRocks", "k4": "StarRocks", "k5": 40.0}
-
{"index":{"_index":"test"}}
-
{ "k1" : 5, "k2": "2022-01-05", "k3": "ES", "k4": "ES", "k5": 50.0}
-
'
mysql -uroot -hcs01 -P 9013
-
USE simon;
-
-
-- 对ES中字符串类型分词类型(text) fields 进行探测
-
CREATE EXTERNAL TABLE `soe_t1` (
-
`k1` bigint(20) NULL COMMENT "",
-
`k2` datetime NULL COMMENT "",
-
`k3` varchar(20) NULL COMMENT "",
-
`k4` varchar(100) NULL COMMENT "",
-
`k5` float NULL COMMENT ""
-
) ENGINE=ELASTICSEARCH
-
COMMENT "ELASTICSEARCH"
-
PROPERTIES (
-
"hosts" = "cs02:9200",
-
"index" = "test",
-
"type" = "_doc",
-
"transport" = "http",
-
"enable_docvalue_scan" = "true",
-
"max_docvalue_fields" = "20",
-
"enable_keyword_sniff" = "true"
-
);
- 需要稍等几秒,同步后再查询
-
MySQL [simon]> select * from soe_t1;
-
ERROR 1064 (HY000): EsTable metadata has not been synced, Try it later
-
MySQL [simon]> desc soe_t1;
-
------- -------------- ------ ------- --------- -------
-
| Field | Type | Null | Key | Default | Extra |
-
------- -------------- ------ ------- --------- -------
-
| k1 | BIGINT | Yes | true | NULL | |
-
| k2 | DATETIME | Yes | true | NULL | |
-
| k3 | VARCHAR(20) | Yes | true | NULL | |
-
| k4 | VARCHAR(100) | Yes | false | NULL | NONE |
-
| k5 | FLOAT | Yes | false | NULL | NONE |
-
------- -------------- ------ ------- --------- -------
-
5 rows in set (0.00 sec)
-
-
mysql> select * from soe_t1;
-
------ --------------------- -------------------------- -------------------------- ------
-
| k1 | k2 | k3 | k4 | k5 |
-
------ --------------------- -------------------------- -------------------------- ------
-
| 1 | 2022-01-01 00:00:00 | Trying out Elasticsearch | Trying out Elasticsearch | 10 |
-
| 2 | 2022-01-02 00:00:00 | Trying out StarRocks | Trying out StarRocks | 20 |
-
| 3 | 2022-01-03 00:00:00 | StarRocks On ES | StarRocks On ES | 30 |
-
| 4 | 2022-01-04 00:00:00 | StarRocks | StarRocks | 40 |
-
| 5 | 2022-01-05 00:00:00 | ES | ES | 50 |
-
------ --------------------- -------------------------- -------------------------- ------
-
5 rows in set (0.01 sec)
-
-
mysql> select * from soe_t1 where k5 > 30;
-
------ --------------------- ----------- ----------- ------
-
| k1 | k2 | k3 | k4 | k5 |
-
------ --------------------- ----------- ----------- ------
-
| 4 | 2022-01-04 00:00:00 | StarRocks | StarRocks | 40 |
-
| 5 | 2022-01-05 00:00:00 | ES | ES | 50 |
-
------ --------------------- ----------- ----------- ------
-
2 rows in set (0.01 sec)
-
-
-- 非分词列,精确匹配
-
mysql> select * from soe_t1 where k3 = 'ES';
-
------ --------------------- ------ ------ ------
-
| k1 | k2 | k3 | k4 | k5 |
-
------ --------------------- ------ ------ ------
-
| 5 | 2022-01-05 00:00:00 | ES | ES | 50 |
-
------ --------------------- ------ ------ ------
-
1 row in set (0.38 sec)
-
-
-- 分词类型(text),按小写分词
-
mysql> select * from soe_t1 where k4 = 'es';
-
------ --------------------- ----------------- ----------------- ------
-
| k1 | k2 | k3 | k4 | k5 |
-
------ --------------------- ----------------- ----------------- ------
-
| 3 | 2022-01-03 00:00:00 | StarRocks On ES | StarRocks On ES | 30 |
-
| 5 | 2022-01-05 00:00:00 | ES | ES | 50 |
-
------ --------------------- ----------------- ----------------- ------
-
2 rows in set (0.01 sec)
-
-
mysql> select * from soe_t1 where k4 = 'starrocks';
-
------ --------------------- ---------------------- ---------------------- ------
-
| k1 | k2 | k3 | k4 | k5 |
-
------ --------------------- ---------------------- ---------------------- ------
-
| 2 | 2022-01-02 00:00:00 | Trying out StarRocks | Trying out StarRocks | 20 |
-
| 3 | 2022-01-03 00:00:00 | StarRocks On ES | StarRocks On ES | 30 |
-
| 4 | 2022-01-04 00:00:00 | StarRocks | StarRocks | 40 |
-
------ --------------------- ---------------------- ---------------------- ------
-
3 rows in set (0.01 sec)
-
-
-- 标准分词器,按小写分词
-
mysql> select * from soe_t1 where k4 = 'ES';
-
Empty set (0.01 sec)
-
-
-- esquery
-
mysql> select * from soe_t1 where esquery(k4, '{
-
"match": {
-
"k4": "es"
-
}
-
}');
-
------ --------------------- ----------------- ----------------- ------
-
| k1 | k2 | k3 | k4 | k5 |
-
------ --------------------- ----------------- ----------------- ------
-
| 3 | 2022-01-03 00:00:00 | StarRocks On ES | StarRocks On ES | 30 |
-
| 5 | 2022-01-05 00:00:00 | ES | ES | 50 |
-
------ --------------------- ----------------- ----------------- ------
-
2 rows in set (0.01 sec)
下一篇:
StarRocks招聘:
招解决方案,DBA,数据库研发,测试,前后端开发等岗位,
有意者请投递简历到 hr@starrocks.com
这篇好文章是转载于:学新通技术网
- 版权申明: 本站部分内容来自互联网,仅供学习及演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,请提供相关证据及您的身份证明,我们将在收到邮件后48小时内删除。
- 本站站名: 学新通技术网
- 本文地址: /boutique/detail/tanhggfkkf
系列文章
更多
同类精品
更多
-
photoshop保存的图片太大微信发不了怎么办
PHP中文网 06-15 -
《学习通》视频自动暂停处理方法
HelloWorld317 07-05 -
Android 11 保存文件到外部存储,并分享文件
Luke 10-12 -
word里面弄一个表格后上面的标题会跑到下面怎么办
PHP中文网 06-20 -
photoshop扩展功能面板显示灰色怎么办
PHP中文网 06-14 -
微信公众号没有声音提示怎么办
PHP中文网 03-31 -
excel下划线不显示怎么办
PHP中文网 06-23 -
excel打印预览压线压字怎么办
PHP中文网 06-22 -
TikTok加速器哪个好免费的TK加速器推荐
TK小达人 10-01 -
怎样阻止微信小程序自动打开
PHP中文网 06-13