milvus-config.yaml 99 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481
  1. # Licensed to the LF AI & Data foundation under one
  2. # or more contributor license agreements. See the NOTICE file
  3. # distributed with this work for additional information
  4. # regarding copyright ownership. The ASF licenses this file
  5. # to you under the Apache License, Version 2.0 (the
  6. # "License"); you may not use this file except in compliance
  7. # with the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. # Related configuration of etcd, used to store Milvus metadata & service discovery.
  17. etcd:
  18. # Endpoints used to access etcd service. You can change this parameter as the endpoints of your own etcd cluster.
  19. # Environment variable: ETCD_ENDPOINTS
  20. # etcd preferentially acquires valid address from environment variable ETCD_ENDPOINTS when Milvus is started.
  21. endpoints: localhost:2379
  22. # Root prefix of the key to where Milvus stores data in etcd.
  23. # It is recommended to change this parameter before starting Milvus for the first time.
  24. # To share an etcd instance among multiple Milvus instances, consider changing this to a different value for each Milvus instance before you start them.
  25. # Set an easy-to-identify root path for Milvus if etcd service already exists.
  26. # Changing this for an already running Milvus instance may result in failures to read legacy data.
  27. rootPath: by-dev
  28. # Sub-prefix of the key to where Milvus stores metadata-related information in etcd.
  29. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  30. # It is recommended to change this parameter before starting Milvus for the first time.
  31. metaSubPath: meta
  32. # Sub-prefix of the key to where Milvus stores timestamps in etcd.
  33. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  34. # It is recommended not to change this parameter if there is no specific reason.
  35. kvSubPath: kv
  36. log:
  37. level: info # Only supports debug, info, warn, error, panic, or fatal. Default 'info'.
  38. # path is one of:
  39. # - "default" as os.Stderr,
  40. # - "stderr" as os.Stderr,
  41. # - "stdout" as os.Stdout,
  42. # - file path to append server logs to.
  43. # please adjust in embedded Milvus: /tmp/milvus/logs/etcd.log
  44. path: stdout
  45. ssl:
  46. enabled: false # Whether to support ETCD secure connection mode
  47. tlsCert: /path/to/etcd-client.pem # path to your cert file
  48. tlsKey: /path/to/etcd-client-key.pem # path to your key file
  49. tlsCACert: /path/to/ca.pem # path to your CACert file
  50. # TLS min version
  51. # Optional values: 1.0, 1.1, 1.2, 1.3。
  52. # We recommend using version 1.2 and above.
  53. tlsMinVersion: 1.3
  54. requestTimeout: 10000 # Etcd operation timeout in milliseconds
  55. dialKeepAliveTime: 3000 # Interval in milliseconds for gRPC dial keepalive pings sent to etcd endpoints.
  56. dialKeepAliveTimeout: 2000 # Timeout in milliseconds waiting for keepalive responses before marking the connection as unhealthy.
  57. use:
  58. embed: false # Whether to enable embedded Etcd (an in-process EtcdServer).
  59. data:
  60. dir: default.etcd # Embedded Etcd only. please adjust in embedded Milvus: /tmp/milvus/etcdData/
  61. auth:
  62. enabled: false # Whether to enable authentication
  63. userName: # username for etcd authentication
  64. password: # password for etcd authentication
  65. metastore:
  66. type: etcd # Default value: etcd, Valid values: [etcd, tikv]
  67. snapshot:
  68. ttl: 86400 # snapshot ttl in seconds
  69. reserveTime: 3600 # snapshot reserve time in seconds
  70. maxEtcdTxnNum: 64 # maximum number of operations in a single etcd transaction
  71. # Related configuration of tikv, used to store Milvus metadata.
  72. # Notice that when TiKV is enabled for metastore, you still need to have etcd for service discovery.
  73. # TiKV is a good option when the metadata size requires better horizontal scalability.
  74. tikv:
  75. endpoints: 127.0.0.1:2389 # Note that the default pd port of tikv is 2379, which conflicts with etcd.
  76. rootPath: by-dev # The root path where data is stored in tikv
  77. metaSubPath: meta # metaRootPath = rootPath + '/' + metaSubPath
  78. kvSubPath: kv # kvRootPath = rootPath + '/' + kvSubPath
  79. requestTimeout: 10000 # ms, tikv request timeout
  80. snapshotScanSize: 256 # batch size of tikv snapshot scan
  81. ssl:
  82. enabled: false # Whether to support TiKV secure connection mode
  83. tlsCert: # path to your cert file
  84. tlsKey: # path to your key file
  85. tlsCACert: # path to your CACert file
  86. localStorage:
  87. # Local path to where vector data are stored during a search or a query to avoid repetitve access to MinIO or S3 service.
  88. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  89. # It is recommended to change this parameter before starting Milvus for the first time.
  90. path: /var/lib/milvus/data/
  91. # Related configuration of MinIO/S3/GCS or any other service supports S3 API, which is responsible for data persistence for Milvus.
  92. # We refer to the storage service as MinIO/S3 in the following description for simplicity.
  93. minio:
  94. # IP address of MinIO or S3 service.
  95. # Environment variable: MINIO_ADDRESS
  96. # minio.address and minio.port together generate the valid access to MinIO or S3 service.
  97. # MinIO preferentially acquires the valid IP address from the environment variable MINIO_ADDRESS when Milvus is started.
  98. # Default value applies when MinIO or S3 is running on the same network with Milvus.
  99. address: localhost:9000
  100. port: 9000 # Port of MinIO or S3 service.
  101. # Access key ID that MinIO or S3 issues to user for authorized access.
  102. # Environment variable: MINIO_ACCESS_KEY_ID or minio.accessKeyID
  103. # minio.accessKeyID and minio.secretAccessKey together are used for identity authentication to access the MinIO or S3 service.
  104. # This configuration must be set identical to the environment variable MINIO_ACCESS_KEY_ID, which is necessary for starting MinIO or S3.
  105. # The default value applies to MinIO or S3 service that started with the default docker-compose.yml file.
  106. accessKeyID: minioadmin
  107. # Secret key used to encrypt the signature string and verify the signature string on server. It must be kept strictly confidential and accessible only to the MinIO or S3 server and users.
  108. # Environment variable: MINIO_SECRET_ACCESS_KEY or minio.secretAccessKey
  109. # minio.accessKeyID and minio.secretAccessKey together are used for identity authentication to access the MinIO or S3 service.
  110. # This configuration must be set identical to the environment variable MINIO_SECRET_ACCESS_KEY, which is necessary for starting MinIO or S3.
  111. # The default value applies to MinIO or S3 service that started with the default docker-compose.yml file.
  112. secretAccessKey: minioadmin
  113. useSSL: false # Switch value to control if to access the MinIO or S3 service through SSL.
  114. ssl:
  115. tlsCACert: /path/to/public.crt # path to your CACert file
  116. # Name of the bucket where Milvus stores data in MinIO or S3.
  117. # Milvus 2.0.0 does not support storing data in multiple buckets.
  118. # Bucket with this name will be created if it does not exist. If the bucket already exists and is accessible, it will be used directly. Otherwise, there will be an error.
  119. # To share an MinIO instance among multiple Milvus instances, consider changing this to a different value for each Milvus instance before you start them. For details, see Operation FAQs.
  120. # The data will be stored in the local Docker if Docker is used to start the MinIO service locally. Ensure that there is sufficient storage space.
  121. # A bucket name is globally unique in one MinIO or S3 instance.
  122. bucketName: a-bucket
  123. # Root prefix of the key to where Milvus stores data in MinIO or S3.
  124. # It is recommended to change this parameter before starting Milvus for the first time.
  125. # To share an MinIO instance among multiple Milvus instances, consider changing this to a different value for each Milvus instance before you start them. For details, see Operation FAQs.
  126. # Set an easy-to-identify root key prefix for Milvus if etcd service already exists.
  127. # Changing this for an already running Milvus instance may result in failures to read legacy data.
  128. rootPath: files
  129. # Whether to useIAM role to access S3/GCS instead of access/secret keys
  130. # For more information, refer to
  131. # aws: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use.html
  132. # gcp: https://cloud.google.com/storage/docs/access-control/iam
  133. # aliyun (ack): https://www.alibabacloud.com/help/en/container-service-for-kubernetes/latest/use-rrsa-to-enforce-access-control
  134. # aliyun (ecs): https://www.alibabacloud.com/help/en/elastic-compute-service/latest/attach-an-instance-ram-role
  135. useIAM: false
  136. # Cloud Provider of S3. Supports: "aws", "gcp", "aliyun".
  137. # Cloud Provider of Google Cloud Storage. Supports: "gcpnative".
  138. # You can use "aws" for other cloud provider supports S3 API with signature v4, e.g.: minio
  139. # You can use "gcp" for other cloud provider supports S3 API with signature v2
  140. # You can use "aliyun" for other cloud provider uses virtual host style bucket
  141. # You can use "gcpnative" for the Google Cloud Platform provider. Uses service account credentials
  142. # for authentication.
  143. # When useIAM enabled, only "aws", "gcp", "aliyun" is supported for now
  144. cloudProvider: aws
  145. # The JSON content contains the gcs service account credentials.
  146. # Used only for the "gcpnative" cloud provider.
  147. gcpCredentialJSON:
  148. # Custom endpoint for fetch IAM role credentials. when useIAM is true & cloudProvider is "aws".
  149. # Leave it empty if you want to use AWS default endpoint
  150. iamEndpoint:
  151. logLevel: fatal # Log level for aws sdk log. Supported level: off, fatal, error, warn, info, debug, trace
  152. region: # Specify minio storage system location region
  153. useVirtualHost: false # Whether use virtual host mode for bucket. WARNING: For Aliyun OSS and Tencent COS, this parameter is useless and is set to true by default
  154. requestTimeoutMs: 10000 # minio timeout for request time in milliseconds
  155. # The maximum number of objects requested per batch in minio ListObjects rpc,
  156. # 0 means using oss client by default, decrease these configration if ListObjects timeout
  157. listObjectsMaxKeys: 0
  158. # Milvus supports four message queues (MQ): rocksmq (based on RocksDB), Pulsar, Kafka, and Woodpecker.
  159. # You can change the MQ by setting the mq.type field.
  160. # If the mq.type field is not set, the following priority is used when multiple MQs are configured in this file:
  161. # 1. standalone (local) mode: rocksmq (default) > Pulsar > Kafka > Woodpecker
  162. # 2. cluster mode: Pulsar (default) > Kafka (rocksmq is unsupported in cluster mode) > Woodpecker
  163. # Note: These MQ priorities are compatible with existing instances. For new instances, it is recommended to explicitly use Woodpecker to achieve better performance, operational simplicity, and cost efficiency.
  164. mq:
  165. # Default value: "default"
  166. # Valid values: [default, pulsar, kafka, rocksmq, woodpecker]
  167. type: default
  168. enablePursuitMode: true # Default value: "true"
  169. pursuitLag: 10 # time tick lag threshold to enter pursuit mode, in seconds
  170. pursuitBufferSize: 8388608 # pursuit mode buffer size in bytes
  171. pursuitBufferTime: 60 # pursuit mode buffer time in seconds
  172. mqBufSize: 16 # MQ client consumer buffer length
  173. dispatcher:
  174. mergeCheckInterval: 0.1 # the interval time(in seconds) for dispatcher to check whether to merge
  175. targetBufSize: 16 # the lenth of channel buffer for targe
  176. maxTolerantLag: 3 # Default value: "3", the timeout(in seconds) that target sends msgPack
  177. # Related configuration of woodpecker, used to manage Milvus logs of recent mutation operations, output streaming log, and provide embedded log sequential read and write.
  178. woodpecker:
  179. meta:
  180. type: etcd # The Type of the metadata provider. currently only support etcd.
  181. prefix: woodpecker # The Prefix of the metadata provider. default is woodpecker.
  182. client:
  183. segmentAppend:
  184. queueSize: 10000 # The size of the queue for pending messages to be sent of each log.
  185. maxRetries: 3 # Maximum number of retries for segment append operations.
  186. segmentRollingPolicy:
  187. maxSize: 256M # Maximum size of a segment.
  188. maxInterval: 10m # Maximum interval between two segments, default is 10 minutes.
  189. maxBlocks: 1000 # Maximum number of blocks in a segment
  190. auditor:
  191. maxInterval: 10s # Maximum interval between two auditing operations, default is 10 seconds.
  192. logstore:
  193. segmentSyncPolicy:
  194. maxInterval: 200ms # Maximum interval between two sync operations, default is 200 milliseconds.
  195. maxIntervalForLocalStorage: 10ms # Maximum interval between two sync operations local storage backend, default is 10 milliseconds.
  196. maxBytes: 256M # Maximum size of write buffer in bytes.
  197. maxEntries: 10000 # Maximum entries number of write buffer.
  198. maxFlushRetries: 5 # Maximum size of write buffer in bytes.
  199. retryInterval: 1000ms # Maximum interval between two retries. default is 1000 milliseconds.
  200. maxFlushSize: 2M # Maximum size of a fragment in bytes to flush.
  201. maxFlushThreads: 32 # Maximum number of threads to flush data
  202. segmentCompactionPolicy:
  203. maxSize: 2M # The maximum size of the merged files.
  204. maxParallelUploads: 4 # The maximum number of parallel upload threads for compaction.
  205. maxParallelReads: 8 # The maximum number of parallel read threads for compaction.
  206. segmentReadPolicy:
  207. maxBatchSize: 16M # Maximum size of a batch in bytes.
  208. maxFetchThreads: 32 # Maximum number of threads to fetch data.
  209. retentionPolicy:
  210. ttl: 72h # Time to live for truncated segments in seconds, default is 72h
  211. fencePolicy:
  212. # Enable conditional write for embedded mode, default is auto, which will automatically detect whether the storage supports conditional write.
  213. # Valid values: [auto, enable, disable]
  214. conditionWrite: auto
  215. storage:
  216. type: minio # The Type of the storage provider. Valid values: [minio, local]
  217. rootPath: default # The root path of the storage provider. If set to 'default', uses localStorage.path as base directory and creates a woodpecker subdirectory. Otherwise, specifies a custom woodpecker data storage directory.
  218. # Related configuration of pulsar, used to manage Milvus logs of recent mutation operations, output streaming log, and provide log publish-subscribe services.
  219. pulsar:
  220. # IP address of Pulsar service.
  221. # Environment variable: PULSAR_ADDRESS
  222. # pulsar.address and pulsar.port together generate the valid access to Pulsar.
  223. # Pulsar preferentially acquires the valid IP address from the environment variable PULSAR_ADDRESS when Milvus is started.
  224. # Default value applies when Pulsar is running on the same network with Milvus.
  225. address: localhost
  226. port: 6650 # Port of Pulsar service.
  227. webport: 80 # Web port of of Pulsar service. If you connect direcly without proxy, should use 8080.
  228. # The maximum size of each message in Pulsar. Unit: Byte.
  229. # By default, Pulsar can transmit at most 2MB of data in a single message. When the size of inserted data is greater than this value, proxy fragments the data into multiple messages to ensure that they can be transmitted correctly.
  230. # If the corresponding parameter in Pulsar remains unchanged, increasing this configuration will cause Milvus to fail, and reducing it produces no advantage.
  231. maxMessageSize: 2097152
  232. # Pulsar can be provisioned for specific tenants with appropriate capacity allocated to the tenant.
  233. # To share a Pulsar instance among multiple Milvus instances, you can change this to an Pulsar tenant rather than the default one for each Milvus instance before you start them. However, if you do not want Pulsar multi-tenancy, you are advised to change msgChannel.chanNamePrefix.cluster to the different value.
  234. tenant: public
  235. namespace: default # A Pulsar namespace is the administrative unit nomenclature within a tenant.
  236. requestTimeout: 60 # pulsar client global request timeout in seconds
  237. enableClientMetrics: true # Whether to register pulsar client metrics into milvus metrics path.
  238. # Perform a backlog cleanup every time the data of given bytes is written.
  239. # Because milvus use puslar reader to read the message, so if there's no pulsar subscriber when milvus running.
  240. # If the pulsar cluster open the backlog protection (backlogQuotaDefaultLimitBytes), the backlog exceed will reported to fail the write operation
  241. # set this option to non-zero will create a subscription seek to latest position to clear the pulsar backlog.
  242. # If these options is non-zero, the wal data in pulsar is fully protected by retention policy,
  243. # so admin of pulsar should give enough retention time to avoid the wal message lost.
  244. # If these options is zero, no subscription will be created, so pulsar cluster must close the backlog protection, otherwise the milvus can not recovered if backlog exceed.
  245. # If this option is zero or negative, it will be ignored and the default value (100m) will be used.
  246. backlogAutoClearBytes: 100m
  247. # If you want to enable kafka, needs to comment the pulsar configs
  248. # kafka:
  249. # brokerList: localhost:9092
  250. # saslUsername:
  251. # saslPassword:
  252. # saslMechanisms:
  253. # securityProtocol:
  254. # ssl:
  255. # enabled: false # whether to enable ssl mode
  256. # tlsCert: # path to client's public key (PEM) used for authentication
  257. # tlsKey: # path to client's private key (PEM) used for authentication
  258. # tlsCaCert: # file or directory path to CA certificate(s) for verifying the broker's key
  259. # tlsKeyPassword: # private key passphrase for use with ssl.key.location and set_ssl_cert(), if any
  260. # readTimeout: 10
  261. # queuedmaxkbytes: 100000
  262. rocksmq:
  263. # Prefix of the key to where Milvus stores data in RocksMQ.
  264. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  265. # It is recommended to change this parameter before starting Milvus for the first time.
  266. # Set an easy-to-identify root key prefix for Milvus if etcd service already exists.
  267. path: /var/lib/milvus/rdb_data
  268. lrucacheratio: 0.06 # rocksdb cache memory ratio
  269. rocksmqPageSize: 67108864 # The maximum size of messages in each page in RocksMQ. Messages in RocksMQ are checked and cleared (when expired) in batch based on this parameters. Unit: Byte.
  270. retentionTimeInMinutes: 4320 # The maximum retention time of acked messages in RocksMQ. Acked messages in RocksMQ are retained for the specified period of time and then cleared. Unit: Minute.
  271. retentionSizeInMB: 8192 # The maximum retention size of acked messages of each topic in RocksMQ. Acked messages in each topic are cleared if their size exceed this parameter. Unit: MB.
  272. compactionInterval: 86400 # Time interval to trigger rocksdb compaction to remove deleted data. Unit: Second
  273. compressionTypes: 0,0,7,7,7 # compaction compression type, only support use 0,7. 0 means not compress, 7 will use zstd. Length of types means num of rocksdb level.
  274. # Related configuration of mixCoord
  275. mixCoord:
  276. enableActiveStandby: false
  277. # Related configuration of rootCoord, used to handle data definition language (DDL) and data control language (DCL) requests
  278. rootCoord:
  279. dmlChannelNum: 16 # The number of DML-Channels to create at the root coord startup.
  280. # The maximum number of partitions in each collection.
  281. # New partitions cannot be created if this parameter is set as 0 or 1.
  282. # Range: [0, INT64MAX]
  283. maxPartitionNum: 1024
  284. # The minimum row count of a segment required for creating index.
  285. # Segments with smaller size than this parameter will not be indexed, and will be searched with brute force.
  286. minSegmentSizeToEnableIndex: 1024
  287. enableActiveStandby: false
  288. maxDatabaseNum: 64 # Maximum number of database
  289. maxGeneralCapacity: 65536 # upper limit for the sum of of product of partitionNumber and shardNumber
  290. gracefulStopTimeout: 5 # seconds. force stop node without graceful stop
  291. ip: # TCP/IP address of rootCoord. If not specified, use the first unicastable address
  292. port: 22125 # TCP port of rootCoord
  293. grpc:
  294. serverMaxSendSize: 536870912 # The maximum size of each RPC request that the rootCoord can send, unit: byte
  295. serverMaxRecvSize: 268435456 # The maximum size of each RPC request that the rootCoord can receive, unit: byte
  296. clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on rootCoord can send, unit: byte
  297. clientMaxRecvSize: 536870912 # The maximum size of each RPC request that the clients on rootCoord can receive, unit: byte
  298. # Related configuration of proxy, used to validate client requests and reduce the returned results.
  299. proxy:
  300. timeTickInterval: 200 # The interval at which proxy synchronizes the time tick, unit: ms.
  301. healthCheckTimeout: 3000 # ms, the interval that to do component healthy check
  302. msgStream:
  303. timeTick:
  304. bufSize: 512 # The maximum number of messages can be buffered in the timeTick message stream of the proxy when producing messages.
  305. maxNameLength: 255 # The maximum length of the name or alias that can be created in Milvus, including the collection name, collection alias, partition name, and field name.
  306. maxFieldNum: 64 # The maximum number of field can be created when creating in a collection. It is strongly DISCOURAGED to set maxFieldNum >= 64.
  307. maxVectorFieldNum: 4 # The maximum number of vector fields that can be specified in a collection
  308. maxShardNum: 16 # The maximum number of shards can be created when creating in a collection.
  309. maxDimension: 32768 # The maximum number of dimensions of a vector can have when creating in a collection.
  310. # Whether to produce gin logs.\n
  311. # please adjust in embedded Milvus: false
  312. ginLogging: true
  313. ginLogSkipPaths: / # skip url path for gin log
  314. nameValidation:
  315. allowedChars: $ # Additional characters allowed in names beyond underscores, letters and numbers. To allow hyphens in names, add '-' here.
  316. roleNameValidation:
  317. allowedChars: $ # Additional characters allowed in role names beyond underscores, letters and numbers. Add '-' to allow hyphens in role names.
  318. maxTaskNum: 1024 # The maximum number of tasks in the task queue of the proxy.
  319. ddlConcurrency: 16 # The concurrent execution number of DDL at proxy.
  320. dclConcurrency: 16 # The concurrent execution number of DCL at proxy.
  321. mustUsePartitionKey: false # switch for whether proxy must use partition key for the collection
  322. # maximum number of result entries, typically Nq * TopK * GroupSize.
  323. # It costs additional memory and time to process a large number of result entries.
  324. # If the number of result entries exceeds this limit, the search will be rejected.
  325. # Disabled if the value is less or equal to 0.
  326. maxResultEntries: -1
  327. accessLog:
  328. enable: false # Whether to enable the access log feature.
  329. minioEnable: false # Whether to upload local access log files to MinIO. This parameter can be specified when proxy.accessLog.filename is not empty.
  330. localPath: /tmp/milvus_access # The local folder path where the access log file is stored. This parameter can be specified when proxy.accessLog.filename is not empty.
  331. filename: # The name of the access log file. If you leave this parameter empty, access logs will be printed to stdout.
  332. maxSize: 64 # The maximum size allowed for a single access log file. If the log file size reaches this limit, a rotation process will be triggered. This process seals the current access log file, creates a new log file, and clears the contents of the original log file. Unit: MB.
  333. rotatedTime: 0 # The maximum time interval allowed for rotating a single access log file. Upon reaching the specified time interval, a rotation process is triggered, resulting in the creation of a new access log file and sealing of the previous one. Unit: seconds
  334. remotePath: access_log/ # The path of the object storage for uploading access log files.
  335. remoteMaxTime: 0 # The time interval allowed for uploading access log files. If the upload time of a log file exceeds this interval, the file will be deleted. Setting the value to 0 disables this feature.
  336. formatters:
  337. base:
  338. format: "[$time_now] [ACCESS] <$user_name: $user_addr> $method_name [status: $method_status] [code: $error_code] [sdk: $sdk_version] [msg: $error_msg] [traceID: $trace_id] [timeCost: $time_cost]"
  339. query:
  340. format: "[$time_now] [ACCESS] <$user_name: $user_addr> $method_name [status: $method_status] [code: $error_code] [sdk: $sdk_version] [msg: $error_msg] [traceID: $trace_id] [timeCost: $time_cost] [database: $database_name] [collection: $collection_name] [partitions: $partition_name] [expr: $method_expr] [params: $query_params]"
  341. methods: "Query, Delete"
  342. search:
  343. format: "[$time_now] [ACCESS] <$user_name: $user_addr> $method_name [status: $method_status] [code: $error_code] [sdk: $sdk_version] [msg: $error_msg] [traceID: $trace_id] [timeCost: $time_cost] [database: $database_name] [collection: $collection_name] [partitions: $partition_name] [expr: $method_expr] [nq: $nq] [params: $search_params]"
  344. methods: "HybridSearch, Search"
  345. cacheSize: 0 # Size of log of write cache, in byte. (Close write cache if size was 0)
  346. cacheFlushInterval: 3 # time interval of auto flush write cache, in seconds. (Close auto flush if interval was 0)
  347. connectionCheckIntervalSeconds: 120 # the interval time(in seconds) for connection manager to scan inactive client info
  348. connectionClientInfoTTLSeconds: 86400 # inactive client info TTL duration, in seconds
  349. maxConnectionNum: 10000 # the max client info numbers that proxy should manage, avoid too many client infos
  350. gracefulStopTimeout: 30 # seconds. force stop node without graceful stop
  351. slowQuerySpanInSeconds: 5 # query whose executed time exceeds the `slowQuerySpanInSeconds` can be considered slow, in seconds.
  352. queryNodePooling:
  353. size: 10 # the size for shardleader(querynode) client pool
  354. partialResultRequiredDataRatio: 1 # partial result required data ratio, default to 1 which means disable partial result, otherwise, it will be used as the minimum data ratio for partial result
  355. http:
  356. enabled: true # Whether to enable the http server
  357. debug_mode: false # Whether to enable http server debug mode
  358. port: # high-level restful api
  359. acceptTypeAllowInt64: true # high-level restful api, whether http client can deal with int64
  360. enablePprof: true # Whether to enable pprof middleware on the metrics port
  361. hstsMaxAge: 31536000 # Strict-Transport-Security max-age in seconds
  362. hstsIncludeSubDomains: false # Include subdomains in Strict-Transport-Security
  363. enableHSTS: false # Whether to enable setting the Strict-Transport-Security header
  364. enableWebUI: true # Whether to enable setting the WebUI middleware on the metrics port
  365. ip: # TCP/IP address of proxy. If not specified, use the first unicastable address
  366. port: 19530 # TCP port of proxy
  367. internalPort: 19529
  368. grpc:
  369. serverMaxSendSize: 268435456 # The maximum size of each RPC request that the proxy can send, unit: byte
  370. serverMaxRecvSize: 67108864 # The maximum size of each RPC request that the proxy can receive, unit: byte
  371. clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on proxy can send, unit: byte
  372. clientMaxRecvSize: 67108864 # The maximum size of each RPC request that the clients on proxy can receive, unit: byte
  373. # Related configuration of queryCoord, used to manage topology and load balancing for the query nodes, and handoff from growing segments to sealed segments.
  374. queryCoord:
  375. taskMergeCap: 1
  376. taskExecutionCap: 256
  377. # Switch value to control if to automatically replace a growing segment with the corresponding indexed sealed segment when the growing segment reaches the sealing threshold.
  378. # If this parameter is set false, Milvus simply searches the growing segments with brute force.
  379. autoHandoff: true
  380. autoBalance: true # Switch value to control if to automatically balance the memory usage among query nodes by distributing segment loading and releasing operations evenly.
  381. autoBalanceChannel: true # Enable auto balance channel
  382. balancer: ScoreBasedBalancer # auto balancer used for segments on queryNodes
  383. globalRowCountFactor: 0.1 # the weight used when balancing segments among queryNodes
  384. scoreUnbalanceTolerationFactor: 0.05 # the least value for unbalanced extent between from and to nodes when doing balance
  385. reverseUnBalanceTolerationFactor: 1.3 # the largest value for unbalanced extent between from and to nodes after doing balance
  386. overloadedMemoryThresholdPercentage: 90 # The threshold of memory usage (in percentage) in a query node to trigger the sealed segment balancing.
  387. balanceIntervalSeconds: 60 # The interval at which query coord balances the memory usage among query nodes.
  388. memoryUsageMaxDifferencePercentage: 30 # The threshold of memory usage difference (in percentage) between any two query nodes to trigger the sealed segment balancing.
  389. rowCountFactor: 0.4 # the row count weight used when balancing segments among queryNodes
  390. segmentCountFactor: 0.4 # the segment count weight used when balancing segments among queryNodes
  391. globalSegmentCountFactor: 0.1 # the segment count weight used when balancing segments among queryNodes
  392. # the channel count weight used when balancing channels among queryNodes,
  393. # A higher value reduces the likelihood of assigning channels from the same collection to the same QueryNode. Set to 1 to disable this feature.
  394. collectionChannelCountFactor: 10
  395. segmentCountMaxSteps: 50 # segment count based plan generator max steps
  396. rowCountMaxSteps: 50 # segment count based plan generator max steps
  397. randomMaxSteps: 10 # segment count based plan generator max steps
  398. growingRowCountWeight: 4 # the memory weight of growing segment row count
  399. delegatorMemoryOverloadFactor: 0.1 # the factor of delegator overloaded memory
  400. balanceCostThreshold: 0.001 # the threshold of balance cost, if the difference of cluster's cost after executing the balance plan is less than this value, the plan will not be executed
  401. checkSegmentInterval: 1000
  402. checkChannelInterval: 1000
  403. checkBalanceInterval: 300
  404. autoBalanceInterval: 3000 # the interval for triggerauto balance
  405. checkIndexInterval: 10000
  406. channelTaskTimeout: 60000 # 1 minute
  407. segmentTaskTimeout: 120000 # 2 minute
  408. distPullInterval: 500
  409. heartbeatAvailableInterval: 10000 # 10s, Only QueryNodes which fetched heartbeats within the duration are available
  410. loadTimeoutSeconds: 600
  411. distRequestTimeout: 5000 # the request timeout for querycoord fetching data distribution from querynodes, in milliseconds
  412. heatbeatWarningLag: 5000 # the lag value for querycoord report warning when last heatbeat is too old, in milliseconds
  413. checkHandoffInterval: 5000
  414. enableActiveStandby: false
  415. checkInterval: 1000
  416. checkHealthInterval: 3000 # 3s, the interval when query coord try to check health of query node
  417. checkHealthRPCTimeout: 2000 # 100ms, the timeout of check health rpc to query node
  418. brokerTimeout: 5000 # 5000ms, querycoord broker rpc timeout
  419. collectionRecoverTimes: 3 # if collection recover times reach the limit during loading state, release it
  420. observerTaskParallel: 16 # the parallel observer dispatcher task number
  421. checkAutoBalanceConfigInterval: 10 # the interval of check auto balance config
  422. checkNodeSessionInterval: 60 # the interval(in seconds) of check querynode cluster session
  423. gracefulStopTimeout: 5 # seconds. force stop node without graceful stop
  424. enableStoppingBalance: true # whether enable stopping balance
  425. channelExclusiveNodeFactor: 4 # the least node number for enable channel's exclusive mode
  426. collectionObserverInterval: 200 # the interval of collection observer
  427. checkExecutedFlagInterval: 100 # the interval of check executed flag to force to pull dist
  428. updateCollectionLoadStatusInterval: 5 # 5m, max interval of updating collection loaded status for check health
  429. # Duration (in seconds) that a query node remains marked as resource exhausted after reaching resource limits.
  430. # During this period, the node won't receive new tasks to loading resource.
  431. # Set to 0 to disable the penalty period.
  432. resourceExhaustionPenaltyDuration: 30
  433. resourceExhaustionCleanupInterval: 10 # Interval (in seconds) for cleaning up expired resource exhaustion marks on query nodes.
  434. cleanExcludeSegmentInterval: 60 # the time duration of clean pipeline exclude segment which used for filter invalid data, in seconds
  435. ip: # TCP/IP address of queryCoord. If not specified, use the first unicastable address
  436. port: 19531 # TCP port of queryCoord
  437. grpc:
  438. serverMaxSendSize: 536870912 # The maximum size of each RPC request that the queryCoord can send, unit: byte
  439. serverMaxRecvSize: 268435456 # The maximum size of each RPC request that the queryCoord can receive, unit: byte
  440. clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on queryCoord can send, unit: byte
  441. clientMaxRecvSize: 536870912 # The maximum size of each RPC request that the clients on queryCoord can receive, unit: byte
  442. # Related configuration of queryNode, used to run hybrid search between vector and scalar data.
  443. queryNode:
  444. stats:
  445. publishInterval: 1000 # The interval that query node publishes the node statistics information, including segment status, cpu usage, memory usage, health status, etc. Unit: ms.
  446. segcore:
  447. knowhereThreadPoolNumRatio: 4 # The number of threads in knowhere's thread pool. If disk is enabled, the pool size will multiply with knowhereThreadPoolNumRatio([1, 32]).
  448. chunkRows: 128 # Row count by which Segcore divides a segment into chunks.
  449. interimIndex:
  450. # Whether to create a temporary index for growing segments and sealed segments not yet indexed, improving search performance.
  451. # Milvus will eventually seals and indexes all segments, but enabling this optimizes search performance for immediate queries following data insertion.
  452. # This defaults to true, indicating that Milvus creates temporary index for growing segments and the sealed segments that are not indexed upon searches.
  453. enableIndex: true
  454. nlist: 128 # interim index nlist, recommend to set sqrt(chunkRows), must smaller than chunkRows/8
  455. nprobe: 16 # nprobe to search small index, based on your accuracy requirement, must smaller than nlist
  456. subDim: 4 # interim index sub dim, recommend to (subDim % vector dim == 0)
  457. refineRatio: 4.5 # interim index parameters, should set to be >= 1.0
  458. indexBuildRatio: 0.1 # the ratio of building interim index rows count with max row count of a flush segment, should set to be < 1.0
  459. refineQuantType: NONE # Data representation of SCANN_DVR index, options: 'NONE', 'FLOAT16', 'BFLOAT16' and 'UINT8'
  460. refineWithQuant: true # whether to use refineQuantType to refine for faster but loss a little precision
  461. denseVectorIndexType: IVF_FLAT_CC # Dense vector intermin index type
  462. memExpansionRate: 1.15 # extra memory needed by building interim index
  463. buildParallelRate: 0.5 # the ratio of building interim index parallel matched with cpu num
  464. multipleChunkedEnable: true # Deprecated. Enable multiple chunked search
  465. enableGeometryCache: false # Enable geometry cache for geometry data
  466. tieredStorage:
  467. warmup:
  468. # options: sync, disable.
  469. # Specifies the timing for warming up the Tiered Storage cache.
  470. # - "sync": data will be loaded into the cache before a segment is considered loaded.
  471. # - "disable": data will not be proactively loaded into the cache, and loaded only if needed by search/query tasks.
  472. # Defaults to "sync", except for vector field which defaults to "disable".
  473. scalarField: sync
  474. scalarIndex: sync
  475. vectorField: disable # cache warmup for vector field raw data is by default disabled.
  476. vectorIndex: sync
  477. # If evictionEnabled is true, a background thread will run every evictionIntervalMs to determine if an
  478. # eviction is necessary and the amount of data to evict from memory/disk.
  479. # - If the current memory/disk usage exceeds the high watermark, an eviction will be triggered to evict data from memory/disk
  480. # until the memory/disk usage is below the low watermark.
  481. # - The max amount of memory/disk that can be used for cache is controlled by overloadedMemoryThresholdPercentage and diskMaxUsagePercentage.
  482. memoryLowWatermarkRatio: 0.75
  483. memoryHighWatermarkRatio: 0.8
  484. diskLowWatermarkRatio: 0.75
  485. diskHighWatermarkRatio: 0.8
  486. # Enable eviction for Tiered Storage. Defaults to false.
  487. # Note that if eviction is enabled, cache data loaded during sync warmup is also subject to eviction.
  488. evictionEnabled: false
  489. # Enable background eviction for Tiered Storage. Defaults to false.
  490. # Background eviction is used to do periodic eviction in a separate thread.
  491. # And it will only work when both 'evictionEnabled' and 'backgroundEvictionEnabled' are set to 'true'.
  492. backgroundEvictionEnabled: false
  493. # Time in seconds after which an unaccessed cache cell will be evicted. 'backgroundEvictionEnabled' is required.
  494. # If a cached data hasn't been accessed again after this time since its last access, it will be evicted.
  495. # If set to 0, time based eviction is disabled.
  496. cacheTtl: 0
  497. storageUsageTrackingEnabled: false # Enable storage usage tracking for Tiered Storage. Defaults to false.
  498. knowhereScoreConsistency: false # Enable knowhere strong consistency score computation logic
  499. deleteDumpBatchSize: 10000 # Batch size for delete snapshot dump in segcore.
  500. loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments
  501. enableDisk: false # enable querynode load disk index, and search on disk index
  502. maxDiskUsagePercentage: 95
  503. cache:
  504. memoryLimit: 2147483648 # Deprecated: 2 GB, 2 * 1024 *1024 *1024
  505. readAheadPolicy: willneed # The read ahead policy of chunk cache, options: `normal, random, sequential, willneed, dontneed`
  506. mmap:
  507. vectorField: true # Enable mmap for loading vector data
  508. vectorIndex: false # Enable mmap for loading vector index
  509. scalarField: false # Enable mmap for loading scalar data
  510. scalarIndex: false # Enable mmap for loading scalar index
  511. jsonShredding: true # Enable mmap for loading json stats
  512. # Enable memory mapping (mmap) to optimize the handling of growing raw data.
  513. # By activating this feature, the memory overhead associated with newly added or modified data will be significantly minimized.
  514. # However, this optimization may come at the cost of a slight decrease in query latency for the affected data segments.
  515. growingMmapEnabled: false
  516. fixedFileSizeForMmapAlloc: 1 # tmp file size for mmap chunk manager
  517. maxDiskUsagePercentageForMmapAlloc: 50 # disk percentage used in mmap chunk manager
  518. lazyload:
  519. enabled: false # Enable lazyload for loading data
  520. waitTimeout: 30000 # max wait timeout duration in milliseconds before start to do lazyload search and retrieve
  521. requestResourceTimeout: 5000 # max timeout in milliseconds for waiting request resource for lazy load, 5s by default
  522. requestResourceRetryInterval: 2000 # retry interval in milliseconds for waiting request resource for lazy load, 2s by default
  523. maxRetryTimes: 1 # max retry times for lazy load, 1 by default
  524. maxEvictPerRetry: 1 # max evict count for lazy load, 1 by default
  525. indexOffsetCacheEnabled: false # enable index offset cache for some scalar indexes, now is just for bitmap index, enable this param can improve performance for retrieving raw data from index
  526. scheduler:
  527. receiveChanSize: 10240
  528. unsolvedQueueSize: 10240
  529. # maxReadConcurrentRatio is the concurrency ratio of read task (search task and query task).
  530. # Max read concurrency would be the value of hardware.GetCPUNum * maxReadConcurrentRatio.
  531. # It defaults to 2.0, which means max read concurrency would be the value of hardware.GetCPUNum * 2.
  532. # Max read concurrency must greater than or equal to 1, and less than or equal to hardware.GetCPUNum * 100.
  533. # (0, 100]
  534. maxReadConcurrentRatio: 1
  535. cpuRatio: 10 # ratio used to estimate read task cpu usage.
  536. maxTimestampLag: 86400
  537. scheduleReadPolicy:
  538. # fifo: A FIFO queue support the schedule.
  539. # user-task-polling:
  540. # The user's tasks will be polled one by one and scheduled.
  541. # Scheduling is fair on task granularity.
  542. # The policy is based on the username for authentication.
  543. # And an empty username is considered the same user.
  544. # When there are no multi-users, the policy decay into FIFO"
  545. name: fifo
  546. taskQueueExpire: 60 # Control how long (many seconds) that queue retains since queue is empty
  547. enableCrossUserGrouping: false # Enable Cross user grouping when using user-task-polling policy. (Disable it if user's task can not merge each other)
  548. maxPendingTaskPerUser: 1024 # Max pending task per user in scheduler
  549. grouping:
  550. maxNQ: 1000
  551. topKMergeRatio: 20
  552. levelZeroForwardPolicy: FilterByBF # delegator level zero deletion forward policy, possible option["FilterByBF", "RemoteLoad"]
  553. streamingDeltaForwardPolicy: FilterByBF # delegator streaming deletion forward policy, possible option["FilterByBF", "Direct"]
  554. forwardBatchSize: 4194304 # the batch size delegator uses for forwarding stream delete in loading procedure
  555. exprCache:
  556. enabled: false # enable expression result cache
  557. capacityBytes: 268435456 # max capacity in bytes for expression result cache
  558. dataSync:
  559. flowGraph:
  560. maxQueueLength: 16 # The maximum size of task queue cache in flow graph in query node.
  561. maxParallelism: 1024 # Maximum number of tasks executed in parallel in the flowgraph
  562. enableSegmentPrune: false # use partition stats to prune data in search/query on shard delegator
  563. queryStreamBatchSize: 4194304 # return min batch size of stream query
  564. queryStreamMaxBatchSize: 134217728 # return max batch size of stream query
  565. bloomFilterApplyParallelFactor: 2 # parallel factor when to apply pk to bloom filter, default to 2*CPU_CORE_NUM
  566. workerPooling:
  567. size: 10 # the size for worker querynode client pool
  568. idfOracle:
  569. enableDisk: true
  570. writeConcurrency: 4
  571. ip: # TCP/IP address of queryNode. If not specified, use the first unicastable address
  572. port: 21123 # TCP port of queryNode
  573. grpc:
  574. serverMaxSendSize: 536870912 # The maximum size of each RPC request that the queryNode can send, unit: byte
  575. serverMaxRecvSize: 268435456 # The maximum size of each RPC request that the queryNode can receive, unit: byte
  576. clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on queryNode can send, unit: byte
  577. clientMaxRecvSize: 536870912 # The maximum size of each RPC request that the clients on queryNode can receive, unit: byte
  578. indexCoord:
  579. bindIndexNodeMode:
  580. enable: false
  581. address: localhost:22930
  582. withCred: false
  583. nodeID: 0
  584. segment:
  585. minSegmentNumRowsToEnableIndex: 1024 # It's a threshold. When the segment num rows is less than this value, the segment will not be indexed
  586. indexNode:
  587. scheduler:
  588. buildParallel: 1
  589. dataCoord:
  590. channel:
  591. watchTimeoutInterval: 300 # Timeout on watching channels (in seconds). Datanode tickler update watch progress will reset timeout timer.
  592. legacyVersionWithoutRPCWatch: 2.4.1 # Datanodes <= this version are considered as legacy nodes, which doesn't have rpc based watch(). This is only used during rolling upgrade where legacy nodes won't get new channels
  593. balanceSilentDuration: 300 # The duration after which the channel manager start background channel balancing
  594. balanceInterval: 360 # The interval with which the channel manager check dml channel balance status
  595. checkInterval: 1 # The interval in seconds with which the channel manager advances channel states
  596. notifyChannelOperationTimeout: 5 # Timeout notifing channel operations (in seconds).
  597. segment:
  598. maxSize: 1024 # The maximum size of a segment, unit: MB. datacoord.segment.maxSize and datacoord.segment.sealProportion together determine if a segment can be sealed.
  599. diskSegmentMaxSize: 2048 # Maximum size of a segment in MB for collection which has Disk index
  600. sealProportion: 0.12 # The minimum proportion to datacoord.segment.maxSize to seal a segment. datacoord.segment.maxSize and datacoord.segment.sealProportion together determine if a segment can be sealed.
  601. sealProportionJitter: 0.1 # segment seal proportion jitter ratio, default value 0.1(10%), if seal proportion is 12%, with jitter=0.1, the actuall applied ratio will be 10.8~12%
  602. assignmentExpiration: 2000 # Expiration time of the segment assignment, unit: ms
  603. allocLatestExpireAttempt: 200 # The time attempting to alloc latest lastExpire from rootCoord after restart
  604. maxLife: 86400 # The max lifetime of segment in seconds, 24*60*60
  605. # If a segment didn't accept dml records in maxIdleTime and the size of segment is greater than
  606. # minSizeFromIdleToSealed, Milvus will automatically seal it.
  607. # The max idle time of segment in seconds, 10*60.
  608. maxIdleTime: 600
  609. minSizeFromIdleToSealed: 16 # The min size in MB of segment which can be idle from sealed.
  610. # The max number of binlog (which is equal to the binlog file num of primary key) for one segment,
  611. # the segment will be sealed if the number of binlog file reaches to max value.
  612. maxBinlogFileNumber: 32
  613. smallProportion: 0.5 # The segment is considered as "small segment" when its # of rows is smaller than
  614. # (smallProportion * segment max # of rows).
  615. # A compaction will happen on small segments if the segment after compaction will have
  616. compactableProportion: 0.85
  617. # over (compactableProportion * segment max # of rows) rows.
  618. # MUST BE GREATER THAN OR EQUAL TO <smallProportion>!!!
  619. # During compaction, the size of segment # of rows is able to exceed segment max # of rows by (expansionRate-1) * 100%.
  620. expansionRate: 1.25
  621. sealPolicy:
  622. channel:
  623. # The size threshold in MB, if the total size of growing segments of each shard
  624. # exceeds this threshold, the largest growing segment will be sealed.
  625. growingSegmentsMemSize: 4096
  626. # If the total entry number of l0 logs of each shard
  627. # exceeds this threshold, the earliest growing segments will be sealed.
  628. blockingL0EntryNum: 5000000
  629. # The size threshold in MB, if the total entry number of l0 logs of each shard
  630. # exceeds this threshold, the earliest growing segments will be sealed.
  631. blockingL0SizeInMB: 64
  632. autoUpgradeSegmentIndex: false # whether auto upgrade segment index to index engine's version
  633. forceRebuildSegmentIndex: false # force rebuild segment index to specify index engine's version
  634. # if param forceRebuildSegmentIndex is enabled, the vector index will be rebuilt to aligned with targetVecIndexVersion.
  635. # if param forceRebuildSegmentIndex is not enabled, the newly created vector index will be aligned with the newer one of index engine's version and targetVecIndexVersion.
  636. # if param targetVecIndexVersion is not set, the default value is -1, which means no target vec index version, then the vector index will be aligned with index engine's version
  637. targetVecIndexVersion: -1
  638. segmentFlushInterval: 2 # the minimal interval duration(unit: Seconds) between flushing operation on same segment
  639. # Switch value to control if to enable segment compaction.
  640. # Compaction merges small-size segments into a large segment, and clears the entities deleted beyond the rentention duration of Time Travel.
  641. enableCompaction: true
  642. compaction:
  643. # Switch value to control if to enable automatic segment compaction during which data coord locates and merges compactable segments in the background.
  644. # This configuration takes effect only when dataCoord.enableCompaction is set as true.
  645. enableAutoCompaction: true
  646. indexBasedCompaction: true
  647. # compaction task prioritizer, options: [default, level, mix].
  648. # default is FIFO.
  649. # level is prioritized by level: L0 compactions first, then mix compactions, then clustering compactions.
  650. # mix is prioritized by level: mix compactions first, then L0 compactions, then clustering compactions.
  651. taskPrioritizer: level
  652. taskQueueCapacity: 100000 # compaction task queue size
  653. rpcTimeout: 10
  654. maxParallelTaskNum: -1 # Deprecated, see datanode.slot.slotCap
  655. dropTolerance: 3600 # Compaction task will be cleaned after finish longer than this time(in seconds)
  656. gcInterval: 1800 # The time interval in seconds for compaction gc
  657. scheduleInterval: 500 # The time interval in milliseconds for scheduling compaction tasks. If the configuration setting is below 100ms, it will be adjusted upwards to 100ms
  658. mix:
  659. triggerInterval: 60 # The time interval in seconds to trigger mix compaction
  660. levelzero:
  661. triggerInterval: 10 # The time interval in seconds for trigger L0 compaction
  662. forceTrigger:
  663. minSize: 8388608 # The minimum size in bytes to force trigger a LevelZero Compaction, default as 8MB
  664. maxSize: 67108864 # The maxmum size in bytes to force trigger a LevelZero Compaction, default as 64MB
  665. deltalogMinNum: 10 # The minimum number of deltalog files to force trigger a LevelZero Compaction
  666. deltalogMaxNum: 30 # The maxmum number of deltalog files to force trigger a LevelZero Compaction, default as 30
  667. expiry:
  668. tolerance: -1 # tolerant duration in hours for expiry data, negative value means no toleration and equivalent to zero
  669. single:
  670. ratio:
  671. threshold: 0.2 # The ratio threshold of a segment to trigger a single compaction, default as 0.2
  672. deltalog:
  673. maxsize: 16777216 # The deltalog size of a segment to trigger a single compaction, default as 16MB
  674. maxnum: 200 # The deltalog count of a segment to trigger a compaction, default as 200
  675. expiredlog:
  676. maxsize: 10485760 # The expired log size of a segment to trigger a compaction, default as 10MB
  677. clustering:
  678. enable: true # Enable clustering compaction
  679. autoEnable: false # Enable auto clustering compaction
  680. triggerInterval: 600 # clustering compaction trigger interval in seconds
  681. minInterval: 3600 # The minimum interval between clustering compaction executions of one collection, to avoid redundant compaction
  682. maxInterval: 259200 # If a collection haven't been clustering compacted for longer than maxInterval, force compact
  683. newDataSizeThreshold: 512m # If new data size is large than newDataSizeThreshold, execute clustering compaction
  684. preferSegmentSizeRatio: 0.8
  685. maxSegmentSizeRatio: 1
  686. maxTrainSizeRatio: 0.8 # max data size ratio in Kmeans train, if larger than it, will down sampling to meet this limit
  687. maxCentroidsNum: 10240 # maximum centroids number in Kmeans train
  688. minCentroidsNum: 16 # minimum centroids number in Kmeans train
  689. minClusterSizeRatio: 0.01 # minimum cluster size / avg size in Kmeans train
  690. maxClusterSizeRatio: 10 # maximum cluster size / avg size in Kmeans train
  691. maxClusterSize: 5g # maximum cluster size in Kmeans train
  692. syncSegmentsInterval: 300 # The time interval for regularly syncing segments
  693. index:
  694. memSizeEstimateMultiplier: 2 # When the memory size is not setup by index procedure, multiplier to estimate the memory size of index data
  695. enableGarbageCollection: true # Switch value to control if to enable garbage collection to clear the discarded data in MinIO or S3 service.
  696. gc:
  697. interval: 3600 # The interval at which data coord performs garbage collection, unit: second.
  698. missingTolerance: 86400 # The retention duration of the unrecorded binary log (binlog) files. Setting a reasonably large value for this parameter avoids erroneously deleting the newly created binlog files that lack metadata. Unit: second.
  699. dropTolerance: 10800 # The retention duration of the binlog files of the deleted segments before they are cleared, unit: second.
  700. scanInterval: 168 # orphan file (file on oss but has not been registered on meta) on object storage garbage collection scanning interval in hours
  701. slowDownCPUUsageThreshold: 0.6 # The CPU usage threshold at which the garbage collection will be slowed down
  702. enableActiveStandby: false
  703. brokerTimeout: 5000 # 5000ms, dataCoord broker rpc timeout
  704. autoBalance: true # Enable auto balance
  705. checkAutoBalanceConfigInterval: 10 # the interval of check auto balance config
  706. import:
  707. filesPerPreImportTask: 2 # The maximum number of files allowed per pre-import task.
  708. taskRetention: 10800 # The retention period in seconds for tasks in the Completed or Failed state.
  709. maxSizeInMBPerImportTask: 16384 # To prevent generating of small segments, we will re-group imported files. This parameter represents the sum of file sizes in each group (each ImportTask).
  710. scheduleInterval: 2 # The interval for scheduling import, measured in seconds.
  711. checkIntervalHigh: 2 # The interval for checking import, measured in seconds, is set to a high frequency for the import checker.
  712. checkIntervalLow: 120 # The interval for checking import, measured in seconds, is set to a low frequency for the import checker.
  713. maxImportFileNumPerReq: 1024 # The maximum number of files allowed per single import request.
  714. maxImportJobNum: 1024 # Maximum number of import jobs that are executing or pending.
  715. waitForIndex: true # Indicates whether the import operation waits for the completion of index building.
  716. fileNumPerSlot: 1 # The files number per slot for pre-import/import task.
  717. memoryLimitPerSlot: 160 # The memory limit (in MB) of buffer size per slot for pre-import/import task.
  718. gracefulStopTimeout: 5 # seconds. force stop node without graceful stop
  719. slot:
  720. clusteringCompactionUsage: 65535 # slot usage of clustering compaction task, setting it to 65536 means it takes up a whole worker.
  721. mixCompactionUsage: 4 # slot usage of mix compaction task.
  722. l0DeleteCompactionUsage: 8 # slot usage of l0 compaction task.
  723. indexTaskSlotUsage: 64 # slot usage of index task per 512mb
  724. scalarIndexTaskSlotUsage: 16 # slot usage of scalar index task per 512mb
  725. statsTaskSlotUsage: 8 # slot usage of stats task per 512mb
  726. analyzeTaskSlotUsage: 65535 # slot usage of analyze task
  727. jsonShreddingTriggerCount: 10 # jsonkey stats task count per trigger
  728. jsonShreddingTriggerInterval: 10 # jsonkey task interval per trigger
  729. jsonShreddingMaxColumns: 1024 # the max number of columns to shred
  730. jsonShreddingRatioThreshold: 0.3 # the ratio threshold to shred
  731. jsonShreddingWriteBatchSize: 81920 # the batch size to write
  732. ip: # TCP/IP address of dataCoord. If not specified, use the first unicastable address
  733. port: 13333 # TCP port of dataCoord
  734. grpc:
  735. serverMaxSendSize: 536870912 # The maximum size of each RPC request that the dataCoord can send, unit: byte
  736. serverMaxRecvSize: 268435456 # The maximum size of each RPC request that the dataCoord can receive, unit: byte
  737. clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on dataCoord can send, unit: byte
  738. clientMaxRecvSize: 536870912 # The maximum size of each RPC request that the clients on dataCoord can receive, unit: byte
  739. dataNode:
  740. dataSync:
  741. flowGraph:
  742. maxQueueLength: 16 # Maximum length of task queue in flowgraph
  743. maxParallelism: 1024 # Maximum number of tasks executed in parallel in the flowgraph
  744. maxParallelSyncMgrTasksPerCPUCore: 16 # The max concurrent sync task number of datanode sync mgr per CPU core
  745. skipMode:
  746. enable: true # Support skip some timetick message to reduce CPU usage
  747. skipNum: 4 # Consume one for every n records skipped
  748. coldTime: 60 # Turn on skip mode after there are only timetick msg for x seconds
  749. segment:
  750. # The maximum size of each binlog file in a segment buffered in memory. Binlog files whose size exceeds this value are then flushed to MinIO or S3 service.
  751. # Unit: Byte
  752. # Setting this parameter too small causes the system to store a small amount of data too frequently. Setting it too large increases the system's demand for memory.
  753. insertBufSize: 16777216
  754. deleteBufBytes: 16777216 # Max buffer size in bytes to flush del for a single channel, default as 16MB
  755. syncPeriod: 600 # The period to sync segments if buffer is not empty.
  756. memory:
  757. forceSyncEnable: true # Set true to force sync if memory usage is too high
  758. forceSyncSegmentNum: 1 # number of segments to sync, segments with top largest buffer will be synced.
  759. checkInterval: 3000 # the interal to check datanode memory usage, in milliseconds
  760. forceSyncWatermark: 0.5 # memory watermark for standalone, upon reaching this watermark, segments will be synced.
  761. timetick:
  762. interval: 500
  763. channel:
  764. # specify the size of global work pool of all channels
  765. # if this parameter <= 0, will set it as the maximum number of CPUs that can be executing
  766. # suggest to set it bigger on large collection numbers to avoid blocking
  767. workPoolSize: -1
  768. # specify the size of global work pool for channel checkpoint updating
  769. # if this parameter <= 0, will set it as 10
  770. updateChannelCheckpointMaxParallel: 10
  771. updateChannelCheckpointInterval: 60 # the interval duration(in seconds) for datanode to update channel checkpoint of each channel
  772. updateChannelCheckpointRPCTimeout: 20 # timeout in seconds for UpdateChannelCheckpoint RPC call
  773. maxChannelCheckpointsPerPRC: 128 # The maximum number of channel checkpoints per UpdateChannelCheckpoint RPC.
  774. channelCheckpointUpdateTickInSeconds: 10 # The frequency, in seconds, at which the channel checkpoint updater executes updates.
  775. import:
  776. concurrencyPerCPUCore: 4 # The execution concurrency unit for import/pre-import tasks per CPU core.
  777. maxImportFileSizeInGB: 16 # The maximum file size (in GB) for an import file, where an import file refers to either a Row-Based file or a set of Column-Based files.
  778. readBufferSizeInMB: 16 # The base insert buffer size (in MB) during import. The actual buffer size will be dynamically calculated based on the number of shards.
  779. readDeleteBufferSizeInMB: 16 # The delete buffer size (in MB) during import.
  780. memoryLimitPercentage: 10 # The percentage of memory limit for import/pre-import tasks.
  781. compaction:
  782. levelZeroBatchMemoryRatio: 0.5 # The minimal memory ratio of free memory for level zero compaction executing in batch mode
  783. levelZeroMaxBatchSize: -1 # Max batch size refers to the max number of L1/L2 segments in a batch when executing L0 compaction. Default to -1, any value that is less than 1 means no limit. Valid range: >= 1.
  784. useMergeSort: true # Whether to enable mergeSort mode when performing mixCompaction.
  785. maxSegmentMergeSort: 30 # The maximum number of segments to be merged in mergeSort mode.
  786. gracefulStopTimeout: 1800 # seconds. force stop node without graceful stop
  787. slot:
  788. slotCap: 16 # The maximum number of tasks(e.g. compaction, importing) allowed to run concurrently on a datanode
  789. clusteringCompaction:
  790. memoryBufferRatio: 0.3 # The ratio of memory buffer of clustering compaction. Data larger than threshold will be flushed to storage.
  791. workPoolSize: 8 # worker pool size for one clustering compaction job.
  792. bloomFilterApplyParallelFactor: 2 # parallel factor when to apply pk to bloom filter, default to 2*CPU_CORE_NUM
  793. storage:
  794. deltalog: json # deltalog format, options: [json, parquet]
  795. ip: # TCP/IP address of dataNode. If not specified, use the first unicastable address
  796. port: 21124 # TCP port of dataNode
  797. grpc:
  798. serverMaxSendSize: 536870912 # The maximum size of each RPC request that the dataNode can send, unit: byte
  799. serverMaxRecvSize: 268435456 # The maximum size of each RPC request that the dataNode can receive, unit: byte
  800. clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on dataNode can send, unit: byte
  801. clientMaxRecvSize: 536870912 # The maximum size of each RPC request that the clients on dataNode can receive, unit: byte
  802. # This topic introduces the message channel-related configurations of Milvus.
  803. msgChannel:
  804. chanNamePrefix:
  805. # Root name prefix of the channel when a message channel is created.
  806. # It is recommended to change this parameter before starting Milvus for the first time.
  807. # To share a Pulsar instance among multiple Milvus instances, consider changing this to a name rather than the default one for each Milvus instance before you start them.
  808. cluster: by-dev
  809. # Sub-name prefix of the message channel where the root coord publishes time tick messages.
  810. # The complete channel name prefix is ${msgChannel.chanNamePrefix.cluster}-${msgChannel.chanNamePrefix.rootCoordTimeTick}
  811. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  812. # It is recommended to change this parameter before starting Milvus for the first time.
  813. rootCoordTimeTick: rootcoord-timetick
  814. # Sub-name prefix of the message channel where the root coord publishes its own statistics messages.
  815. # The complete channel name prefix is ${msgChannel.chanNamePrefix.cluster}-${msgChannel.chanNamePrefix.rootCoordStatistics}
  816. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  817. # It is recommended to change this parameter before starting Milvus for the first time.
  818. rootCoordStatistics: rootcoord-statistics
  819. # Sub-name prefix of the message channel where the root coord publishes Data Manipulation Language (DML) messages.
  820. # The complete channel name prefix is ${msgChannel.chanNamePrefix.cluster}-${msgChannel.chanNamePrefix.rootCoordDml}
  821. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  822. # It is recommended to change this parameter before starting Milvus for the first time.
  823. rootCoordDml: rootcoord-dml
  824. replicateMsg: replicate-msg
  825. # Sub-name prefix of the message channel where the query node publishes time tick messages.
  826. # The complete channel name prefix is ${msgChannel.chanNamePrefix.cluster}-${msgChannel.chanNamePrefix.queryTimeTick}
  827. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  828. # It is recommended to change this parameter before starting Milvus for the first time.
  829. queryTimeTick: queryTimeTick
  830. # Sub-name prefix of the message channel where the data coord publishes time tick messages.
  831. # The complete channel name prefix is ${msgChannel.chanNamePrefix.cluster}-${msgChannel.chanNamePrefix.dataCoordTimeTick}
  832. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  833. # It is recommended to change this parameter before starting Milvus for the first time.
  834. dataCoordTimeTick: datacoord-timetick-channel
  835. # Sub-name prefix of the message channel where the data coord publishes segment information messages.
  836. # The complete channel name prefix is ${msgChannel.chanNamePrefix.cluster}-${msgChannel.chanNamePrefix.dataCoordSegmentInfo}
  837. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  838. # It is recommended to change this parameter before starting Milvus for the first time.
  839. dataCoordSegmentInfo: segment-info-channel
  840. subNamePrefix:
  841. # Subscription name prefix of the data coord.
  842. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  843. # It is recommended to change this parameter before starting Milvus for the first time.
  844. dataCoordSubNamePrefix: dataCoord
  845. # Subscription name prefix of the data node.
  846. # Caution: Changing this parameter after using Milvus for a period of time will affect your access to old data.
  847. # It is recommended to change this parameter before starting Milvus for the first time.
  848. dataNodeSubNamePrefix: dataNode
  849. # Configures the system log output.
  850. log:
  851. # Milvus log level. Option: trace, debug, info, warn, error, panic, and fatal.
  852. # It is recommended to use debug level under test and development environments, and info level in production environment.
  853. level: info
  854. file:
  855. # Root path to the log files.
  856. # The default value is set empty, indicating to output log files to standard output (stdout) and standard error (stderr).
  857. # If this parameter is set to a valid local path, Milvus writes and stores log files in this path.
  858. # Set this parameter as the path that you have permission to write.
  859. rootPath:
  860. maxSize: 300 # The maximum size of a log file, unit: MB.
  861. maxAge: 10 # The maximum retention time before a log file is automatically cleared, unit: day. The minimum value is 1.
  862. maxBackups: 20 # The maximum number of log files to back up, unit: day. The minimum value is 1.
  863. format: text # Milvus log format. Option: text and JSON
  864. stdout: true # Stdout enable or not
  865. grpc:
  866. log:
  867. level: WARNING
  868. gracefulStopTimeout: 3 # second, time to wait graceful stop finish
  869. client:
  870. compressionEnabled: false
  871. dialTimeout: 200
  872. keepAliveTime: 10000
  873. keepAliveTimeout: 20000
  874. maxMaxAttempts: 10
  875. initialBackoff: 0.2
  876. maxBackoff: 10
  877. backoffMultiplier: 2
  878. minResetInterval: 1000
  879. maxCancelError: 32
  880. minSessionCheckInterval: 200
  881. # Configure external tls.
  882. tls:
  883. serverPemPath: configs/cert/server.pem
  884. serverKeyPath: configs/cert/server.key
  885. caPemPath: configs/cert/ca.pem
  886. # Configure internal tls.
  887. internaltls:
  888. serverPemPath: configs/cert/server.pem
  889. serverKeyPath: configs/cert/server.key
  890. caPemPath: configs/cert/ca.pem
  891. sni: localhost # The server name indication (SNI) for internal TLS, should be the same as the name provided by the certificates ref: https://en.wikipedia.org/wiki/Server_Name_Indication
  892. common:
  893. defaultPartitionName: _default # Name of the default partition when a collection is created
  894. defaultIndexName: _default_idx # Name of the index when it is created with name unspecified
  895. entityExpiration: -1 # Entity expiration in seconds, CAUTION -1 means never expire
  896. indexSliceSize: 16 # Index slice size in MB
  897. threadCoreCoefficient:
  898. highPriority: 10 # This parameter specify how many times the number of threads is the number of cores in high priority pool
  899. middlePriority: 5 # This parameter specify how many times the number of threads is the number of cores in middle priority pool
  900. lowPriority: 1 # This parameter specify how many times the number of threads is the number of cores in low priority pool
  901. buildIndexThreadPoolRatio: 0.75
  902. DiskIndex:
  903. MaxDegree: 56
  904. SearchListSize: 100
  905. PQCodeBudgetGBRatio: 0.125
  906. BuildNumThreadsRatio: 1
  907. SearchCacheBudgetGBRatio: 0.1
  908. LoadNumThreadRatio: 8
  909. BeamWidthRatio: 4
  910. gracefulTime: 5000 # milliseconds. it represents the interval (in ms) by which the request arrival time needs to be subtracted in the case of Bounded Consistency.
  911. gracefulStopTimeout: 1800 # seconds. it will force quit the server if the graceful stop process is not completed during this time.
  912. namespace:
  913. enabled: false # whether to enable namespace, this parameter may be deprecated in the future. Just keep it for compatibility.
  914. storageType: remote # please adjust in embedded Milvus: local, available values are [local, remote, opendal], value minio is deprecated, use remote instead
  915. # Default value: auto
  916. # Valid values: [auto, avx512, avx2, avx, sse4_2]
  917. # This configuration is only used by querynode and indexnode, it selects CPU instruction set for Searching and Index-building.
  918. simdType: auto
  919. # This parameter controls the write mode of the local disk, which is used to write temporary data downloaded from remote storage.
  920. # Currently, only QueryNode uses 'common.diskWrite*' parameters. Support for other components will be added in the future.
  921. # The options include 'direct' and 'buffered'. The default value is 'buffered'.
  922. diskWriteMode: buffered
  923. # Disk write buffer size in KB, used for both 'direct' and 'buffered' modes, default is 64KB.
  924. # Current valid range is [4, 65536]. If the value is not aligned to 4KB, it will be rounded up to the nearest multiple of 4KB.
  925. diskWriteBufferSizeKb: 64
  926. # This parameter controls the number of writer threads used for disk write operations. The valid range is [0, hardware_concurrency].
  927. # It is designed to limit the maximum concurrency of disk write operations to reduce the impact on disk read performance.
  928. # For example, if you want to limit the maximum concurrency of disk write operations to 1, you can set this parameter to 1.
  929. # The default value is 0, which means the caller will perform write operations directly without using an additional writer thread pool.
  930. # In this case, the maximum concurrency of disk write operations is determined by the caller's thread pool size.
  931. diskWriteNumThreads: 0
  932. diskWriteRateLimiter:
  933. refillPeriodUs: 100000 # refill period in microseconds if disk rate limiter is enabled, default is 100000us (100ms)
  934. avgKBps: 262144 # average kilobytes per second if disk rate limiter is enabled, default is 262144KB/s (256MB/s)
  935. maxBurstKBps: 524288 # max burst kilobytes per second if disk rate limiter is enabled, default is 524288KB/s (512MB/s)
  936. # amplification ratio for high priority tasks if disk rate limiter is enabled, value <= 0 means ratio limit is disabled.
  937. # The ratio is the multiplication factor of the configured bandwidth.
  938. # For example, if the rate limit is 100KB/s, and the high priority ratio is 2, then the high priority tasks will be limited to 200KB/s.
  939. highPriorityRatio: -1
  940. middlePriorityRatio: -1 # amplification ratio for middle priority tasks if disk rate limiter is enabled, value <= 0 means ratio limit is disabled
  941. lowPriorityRatio: -1 # amplification ratio for low priority tasks if disk rate limiter is enabled, value <= 0 means ratio limit is disabled
  942. security:
  943. authorizationEnabled: false
  944. # The superusers will ignore some system check processes,
  945. # like the old password verification when updating the credential
  946. superUsers:
  947. # default password for root user. The maximum length is 72 characters.
  948. # Large numeric passwords require double quotes to avoid yaml parsing precision issues.
  949. defaultRootPassword: Milvus
  950. rootShouldBindRole: false # Whether the root user should bind a role when the authorization is enabled.
  951. enablePublicPrivilege: true # Whether to enable public privilege
  952. rbac:
  953. overrideBuiltInPrivilegeGroups:
  954. enabled: false # Whether to override build-in privilege groups
  955. cluster:
  956. readonly:
  957. privileges: ListDatabases,SelectOwnership,SelectUser,DescribeResourceGroup,ListResourceGroups,ListPrivilegeGroups # Cluster level readonly privileges
  958. readwrite:
  959. privileges: ListDatabases,SelectOwnership,SelectUser,DescribeResourceGroup,ListResourceGroups,ListPrivilegeGroups,FlushAll,TransferNode,TransferReplica,UpdateResourceGroups # Cluster level readwrite privileges
  960. admin:
  961. privileges: ListDatabases,SelectOwnership,SelectUser,DescribeResourceGroup,ListResourceGroups,ListPrivilegeGroups,FlushAll,TransferNode,TransferReplica,UpdateResourceGroups,BackupRBAC,RestoreRBAC,CreateDatabase,DropDatabase,CreateOwnership,DropOwnership,ManageOwnership,CreateResourceGroup,DropResourceGroup,UpdateUser,RenameCollection,CreatePrivilegeGroup,DropPrivilegeGroup,OperatePrivilegeGroup,UpdateReplicateConfiguration # Cluster level admin privileges
  962. database:
  963. readonly:
  964. privileges: ShowCollections,DescribeDatabase # Database level readonly privileges
  965. readwrite:
  966. privileges: ShowCollections,DescribeDatabase,AlterDatabase # Database level readwrite privileges
  967. admin:
  968. privileges: ShowCollections,DescribeDatabase,AlterDatabase,CreateCollection,DropCollection # Database level admin privileges
  969. collection:
  970. readonly:
  971. privileges: Query,Search,IndexDetail,GetFlushState,GetLoadState,GetLoadingProgress,HasPartition,ShowPartitions,DescribeCollection,DescribeAlias,GetStatistics,ListAliases,GetImportProgress,ListImport # Collection level readonly privileges
  972. readwrite:
  973. privileges: Query,Search,IndexDetail,GetFlushState,GetLoadState,GetLoadingProgress,HasPartition,ShowPartitions,DescribeCollection,DescribeAlias,GetStatistics,ListAliases,GetImportProgress,ListImport,Load,Release,Insert,Delete,Upsert,Import,Flush,Compaction,LoadBalance,CreateIndex,DropIndex,CreatePartition,DropPartition,AddCollectionField # Collection level readwrite privileges
  974. admin:
  975. privileges: Query,Search,IndexDetail,GetFlushState,GetLoadState,GetLoadingProgress,HasPartition,ShowPartitions,DescribeCollection,DescribeAlias,GetStatistics,ListAliases,GetImportProgress,ListImport,Load,Release,Insert,Delete,Upsert,Import,Flush,Compaction,LoadBalance,CreateIndex,DropIndex,CreatePartition,DropPartition,AddCollectionField,CreateAlias,DropAlias # Collection level admin privileges
  976. internaltlsEnabled: false
  977. tlsMode: 0
  978. session:
  979. ttl: 15 # ttl value when session granting a lease to register service
  980. retryTimes: 30 # retry times when session sending etcd requests
  981. estimate:
  982. varCharLengthAvg: 256 # average length considered per VarChar/Text field when estimating record size
  983. dynamicFieldLengthAvg: 512 # average length considered per JSON/Array/Geometry field when estimating record size
  984. sparseFloatVectorSize: 1200 # fallback size (bytes) used when estimating sparse float vector fields
  985. locks:
  986. metrics:
  987. enable: false # whether gather statistics for metrics locks
  988. threshold:
  989. info: 500 # minimum milliseconds for printing durations in info level
  990. warn: 1000 # minimum milliseconds for printing durations in warn level
  991. maxWLockConditionalWaitTime: 600 # maximum seconds for waiting wlock conditional
  992. storage:
  993. enablev2: true
  994. stv2:
  995. splitSystemColumn:
  996. enabled: true # enable split system column policy in storage v2
  997. includePK: true # whether split system column policy include pk field
  998. splitByAvgSize:
  999. enabled: false # enable split by average size policy in storage v2
  1000. threshold: 1024 # split by average size policy threshold(in bytes) in storage v2
  1001. useLoonFFI: false
  1002. # Whether to disable the internal time messaging mechanism for the system.
  1003. # If disabled (set to false), the system will not allow DML operations, including insertion, deletion, queries, and searches.
  1004. # This helps Milvus-CDC synchronize incremental data
  1005. ttMsgEnabled: true
  1006. traceLogMode: 0 # trace request info
  1007. bloomFilterSize: 100000 # bloom filter initial size
  1008. bloomFilterType: BlockedBloomFilter # bloom filter type, support BasicBloomFilter and BlockedBloomFilter
  1009. maxBloomFalsePositive: 0.001 # max false positive rate for bloom filter
  1010. bloomFilterApplyBatchSize: 1000 # batch size when to apply pk to bloom filter
  1011. collectionReplicateEnable: false # Whether to enable collection replication.
  1012. usePartitionKeyAsClusteringKey: false # if true, do clustering compaction and segment prune on partition key field
  1013. useVectorAsClusteringKey: false # if true, do clustering compaction and segment prune on vector field
  1014. enableVectorClusteringKey: false # if true, enable vector clustering key and vector clustering compaction
  1015. localRPCEnabled: false # enable local rpc for internal communication when mix or standalone mode.
  1016. sync:
  1017. taskPoolReleaseTimeoutSeconds: 60 # The maximum time to wait for the task to finish and release resources in the pool
  1018. enabledOptimizeExpr: true # Indicates whether to enable optimize expr
  1019. enabledJSONShredding: false # Indicates sealedsegment whether to enable JSON key stats
  1020. enabledGrowingSegmentJSONShredding: false # Indicates growingsegment whether to enable JSON key stats
  1021. enableConfigParamTypeCheck: true # Indicates whether to enable config param type check
  1022. enablePosixMode: false # Specifies whether to run in POSIX mode for enhanced file system compatibility
  1023. usingJSONShreddingForQuery: true # Indicates whether to use json stats when query
  1024. clusterID: 0 # cluster id
  1025. # QuotaConfig, configurations of Milvus quota and limits.
  1026. # By default, we enable:
  1027. # 1. TT protection;
  1028. # 2. Memory protection.
  1029. # 3. Disk quota protection.
  1030. # You can enable:
  1031. # 1. DML throughput limitation;
  1032. # 2. DDL, DQL qps/rps limitation;
  1033. # 3. DQL Queue length/latency protection;
  1034. # 4. DQL result rate protection;
  1035. # If necessary, you can also manually force to deny RW requests.
  1036. quotaAndLimits:
  1037. enabled: true # `true` to enable quota and limits, `false` to disable.
  1038. # quotaCenterCollectInterval is the time interval that quotaCenter
  1039. # collects metrics from Proxies, Query cluster and Data cluster.
  1040. # seconds, (0 ~ 65536)
  1041. quotaCenterCollectInterval: 3
  1042. # FactorChangeThreshold defines the minimum relative change in factor to trigger an update.
  1043. # If the factor change is less than this threshold (e.g., 5%), the update is skipped
  1044. # to reduce unnecessary proxy updates. Range: (0, 1]
  1045. factorChangeThreshold: 0.05
  1046. forceDenyAllDDL: false # true to force deny all DDL requests, false to allow.
  1047. limits:
  1048. allocRetryTimes: 15 # retry times when delete alloc forward data from rate limit failed
  1049. allocWaitInterval: 1000 # retry wait duration when delete alloc forward data rate failed, in millisecond
  1050. complexDeleteLimitEnable: false # whether complex delete check forward data by limiter
  1051. maxCollectionNum: 65536
  1052. maxCollectionNumPerDB: 65536 # Maximum number of collections per database.
  1053. maxInsertSize: -1 # maximum size of a single insert request, in bytes, -1 means no limit
  1054. maxResourceGroupNumOfQueryNode: 1024 # maximum number of resource groups of query nodes
  1055. maxGroupSize: 10 # maximum size for one single group when doing search group by
  1056. ddl:
  1057. enabled: false # Whether DDL request throttling is enabled.
  1058. # Maximum number of collection-related DDL requests per second.
  1059. # Setting this item to 10 indicates that Milvus processes no more than 10 collection-related DDL requests per second, including collection creation requests, collection drop requests, collection load requests, and collection release requests.
  1060. # To use this setting, set quotaAndLimits.ddl.enabled to true at the same time.
  1061. collectionRate: -1
  1062. # Maximum number of partition-related DDL requests per second.
  1063. # Setting this item to 10 indicates that Milvus processes no more than 10 partition-related requests per second, including partition creation requests, partition drop requests, partition load requests, and partition release requests.
  1064. # To use this setting, set quotaAndLimits.ddl.enabled to true at the same time.
  1065. partitionRate: -1
  1066. db:
  1067. collectionRate: -1 # qps of db level , default no limit, rate for CreateCollection, DropCollection, LoadCollection, ReleaseCollection
  1068. partitionRate: -1 # qps of db level, default no limit, rate for CreatePartition, DropPartition, LoadPartition, ReleasePartition
  1069. indexRate:
  1070. enabled: false # Whether index-related request throttling is enabled.
  1071. # Maximum number of index-related requests per second.
  1072. # Setting this item to 10 indicates that Milvus processes no more than 10 partition-related requests per second, including index creation requests and index drop requests.
  1073. # To use this setting, set quotaAndLimits.indexRate.enabled to true at the same time.
  1074. max: -1
  1075. db:
  1076. max: -1 # qps of db level, default no limit, rate for CreateIndex, DropIndex
  1077. flushRate:
  1078. enabled: true # Whether flush request throttling is enabled.
  1079. # Maximum number of flush requests per second.
  1080. # Setting this item to 10 indicates that Milvus processes no more than 10 flush requests per second.
  1081. # To use this setting, set quotaAndLimits.flushRate.enabled to true at the same time.
  1082. max: -1
  1083. collection:
  1084. max: 0.1 # qps, default no limit, rate for flush at collection level.
  1085. db:
  1086. max: -1 # qps of db level, default no limit, rate for flush
  1087. compactionRate:
  1088. enabled: false # Whether manual compaction request throttling is enabled.
  1089. # Maximum number of manual-compaction requests per second.
  1090. # Setting this item to 10 indicates that Milvus processes no more than 10 manual-compaction requests per second.
  1091. # To use this setting, set quotaAndLimits.compaction.enabled to true at the same time.
  1092. max: -1
  1093. db:
  1094. max: -1 # qps of db level, default no limit, rate for manualCompaction
  1095. dbRate:
  1096. enabled: false # Whether DB request throttling is enabled
  1097. # Maximum number of db-related requests per second.
  1098. # Setting this item to 10 indicates that Milvus processes no more than 10 db-related requests per second, including db creation/drop/alter requests.
  1099. # To use this setting, set quotaAndLimits.dbRate.enabled to true at the same time.
  1100. #
  1101. max: -1
  1102. dml:
  1103. enabled: false # Whether DML request throttling is enabled.
  1104. insertRate:
  1105. # Highest data insertion rate per second.
  1106. # Setting this item to 5 indicates that Milvus only allows data insertion at the rate of 5 MB/s.
  1107. # To use this setting, set quotaAndLimits.dml.enabled to true at the same time.
  1108. max: -1
  1109. db:
  1110. max: -1 # MB/s, default no limit
  1111. collection:
  1112. # Highest data insertion rate per collection per second.
  1113. # Setting this item to 5 indicates that Milvus only allows data insertion to any collection at the rate of 5 MB/s.
  1114. # To use this setting, set quotaAndLimits.dml.enabled to true at the same time.
  1115. max: -1
  1116. partition:
  1117. max: -1 # MB/s, default no limit
  1118. deleteRate:
  1119. # Highest data deletion rate per second.
  1120. # Setting this item to 0.1 indicates that Milvus only allows data deletion at the rate of 0.1 MB/s.
  1121. # To use this setting, set quotaAndLimits.dml.enabled to true at the same time.
  1122. max: -1
  1123. db:
  1124. max: -1 # MB/s, default no limit
  1125. collection:
  1126. # Highest data deletion rate per second.
  1127. # Setting this item to 0.1 indicates that Milvus only allows data deletion from any collection at the rate of 0.1 MB/s.
  1128. # To use this setting, set quotaAndLimits.dml.enabled to true at the same time.
  1129. max: -1
  1130. partition:
  1131. max: -1 # MB/s, default no limit
  1132. bulkLoadRate:
  1133. max: -1 # MB/s, default no limit, not support yet. TODO: limit bulkLoad rate
  1134. db:
  1135. max: -1 # MB/s, default no limit, not support yet. TODO: limit db bulkLoad rate
  1136. collection:
  1137. max: -1 # MB/s, default no limit, not support yet. TODO: limit collection bulkLoad rate
  1138. partition:
  1139. max: -1 # MB/s, default no limit, not support yet. TODO: limit partition bulkLoad rate
  1140. dql:
  1141. enabled: false # Whether DQL request throttling is enabled.
  1142. searchRate:
  1143. # Maximum number of vectors to search per second.
  1144. # Setting this item to 100 indicates that Milvus only allows searching 100 vectors per second no matter whether these 100 vectors are all in one search or scattered across multiple searches.
  1145. # To use this setting, set quotaAndLimits.dql.enabled to true at the same time.
  1146. max: -1
  1147. db:
  1148. max: -1 # vps (vectors per second), default no limit
  1149. collection:
  1150. # Maximum number of vectors to search per collection per second.
  1151. # Setting this item to 100 indicates that Milvus only allows searching 100 vectors per second per collection no matter whether these 100 vectors are all in one search or scattered across multiple searches.
  1152. # To use this setting, set quotaAndLimits.dql.enabled to true at the same time.
  1153. max: -1
  1154. partition:
  1155. max: -1 # vps (vectors per second), default no limit
  1156. queryRate:
  1157. # Maximum number of queries per second.
  1158. # Setting this item to 100 indicates that Milvus only allows 100 queries per second.
  1159. # To use this setting, set quotaAndLimits.dql.enabled to true at the same time.
  1160. max: -1
  1161. db:
  1162. max: -1 # qps, default no limit
  1163. collection:
  1164. # Maximum number of queries per collection per second.
  1165. # Setting this item to 100 indicates that Milvus only allows 100 queries per collection per second.
  1166. # To use this setting, set quotaAndLimits.dql.enabled to true at the same time.
  1167. max: -1
  1168. partition:
  1169. max: -1 # qps, default no limit
  1170. limitWriting:
  1171. # forceDeny false means dml requests are allowed (except for some
  1172. # specific conditions, such as memory of nodes to water marker), true means always reject all dml requests.
  1173. forceDeny: false
  1174. ttProtection:
  1175. enabled: false
  1176. # maxTimeTickDelay indicates the backpressure for DML Operations.
  1177. # DML rates would be reduced according to the ratio of time tick delay to maxTimeTickDelay,
  1178. # if time tick delay is greater than maxTimeTickDelay, all DML requests would be rejected.
  1179. # seconds
  1180. maxTimeTickDelay: 1200
  1181. memProtection:
  1182. # When memory usage > memoryHighWaterLevel, all dml requests would be rejected;
  1183. # When memoryLowWaterLevel < memory usage < memoryHighWaterLevel, reduce the dml rate;
  1184. # When memory usage < memoryLowWaterLevel, no action.
  1185. enabled: true
  1186. dataNodeMemoryLowWaterLevel: 0.85 # (0, 1], memoryLowWaterLevel in DataNodes
  1187. dataNodeMemoryHighWaterLevel: 0.95 # (0, 1], memoryHighWaterLevel in DataNodes
  1188. queryNodeMemoryLowWaterLevel: 0.85 # (0, 1], memoryLowWaterLevel in QueryNodes
  1189. queryNodeMemoryHighWaterLevel: 0.95 # (0, 1], memoryHighWaterLevel in QueryNodes
  1190. growingSegmentsSizeProtection:
  1191. # No action will be taken if the growing segments size is less than the low watermark.
  1192. # When the growing segments size exceeds the low watermark, the dml rate will be reduced,
  1193. # but the rate will not be lower than minRateRatio * dmlRate.
  1194. enabled: false
  1195. minRateRatio: 0.5
  1196. lowWaterLevel: 0.2
  1197. highWaterLevel: 0.4
  1198. diskProtection:
  1199. enabled: true # When the total file size of object storage is greater than `diskQuota`, all dml requests would be rejected;
  1200. diskQuota: -1 # MB, (0, +inf), default no limit
  1201. loadedDiskQuota: -1 # MB, (0, +inf), default no limit
  1202. diskQuotaPerDB: -1 # MB, (0, +inf), default no limit
  1203. diskQuotaPerCollection: -1 # MB, (0, +inf), default no limit
  1204. diskQuotaPerPartition: -1 # MB, (0, +inf), default no limit
  1205. l0SegmentsRowCountProtection:
  1206. enabled: false # switch to enable l0 segment row count quota
  1207. lowWaterLevel: 30000000 # l0 segment row count quota, low water level
  1208. highWaterLevel: 50000000 # l0 segment row count quota, high water level
  1209. deleteBufferRowCountProtection:
  1210. enabled: false # switch to enable delete buffer row count quota
  1211. lowWaterLevel: 32768 # delete buffer row count quota, low water level
  1212. highWaterLevel: 65536 # delete buffer row count quota, high water level
  1213. deleteBufferSizeProtection:
  1214. enabled: false # switch to enable delete buffer size quota
  1215. lowWaterLevel: 134217728 # delete buffer size quota, low water level
  1216. highWaterLevel: 268435456 # delete buffer size quota, high water level
  1217. limitReading:
  1218. # forceDeny false means dql requests are allowed (except for some
  1219. # specific conditions, such as collection has been dropped), true means always reject all dql requests.
  1220. forceDeny: false
  1221. trace:
  1222. # trace exporter type, default is stdout,
  1223. # optional values: ['noop','stdout', 'jaeger', 'otlp']
  1224. exporter: noop
  1225. # fraction of traceID based sampler,
  1226. # optional values: [0, 1]
  1227. # Fractions >= 1 will always sample. Fractions < 0 are treated as zero.
  1228. sampleFraction: 0
  1229. jaeger:
  1230. url: # when exporter is jaeger should set the jaeger's URL
  1231. otlp:
  1232. endpoint: # example: "127.0.0.1:4317" for grpc, "127.0.0.1:4318" for http
  1233. method: # otlp export method, acceptable values: ["grpc", "http"], using "grpc" by default
  1234. secure: true
  1235. headers: # otlp header that encoded in base64
  1236. initTimeoutSeconds: 10 # segcore initialization timeout in seconds, preventing otlp grpc hangs forever
  1237. #when using GPU indexing, Milvus will utilize a memory pool to avoid frequent memory allocation and deallocation.
  1238. #here, you can set the size of the memory occupied by the memory pool, with the unit being MB.
  1239. #note that there is a possibility of Milvus crashing when the actual memory demand exceeds the value set by maxMemSize.
  1240. #if initMemSize and MaxMemSize both set zero,
  1241. #milvus will automatically initialize half of the available GPU memory,
  1242. #maxMemSize will the whole available GPU memory.
  1243. gpu:
  1244. initMemSize: 2048 # Gpu Memory Pool init size
  1245. maxMemSize: 4096 # Gpu Memory Pool Max size
  1246. overloadedMemoryThresholdPercentage: 95
  1247. # Any configuration related to the streaming node server.
  1248. streamingNode:
  1249. ip: # TCP/IP address of streamingNode. If not specified, use the first unicastable address
  1250. port: 22222 # TCP port of streamingNode
  1251. grpc:
  1252. serverMaxSendSize: 268435456 # The maximum size of each RPC request that the streamingNode can send, unit: byte
  1253. serverMaxRecvSize: 268435456 # The maximum size of each RPC request that the streamingNode can receive, unit: byte
  1254. clientMaxSendSize: 268435456 # The maximum size of each RPC request that the clients on streamingNode can send, unit: byte
  1255. clientMaxRecvSize: 268435456 # The maximum size of each RPC request that the clients on streamingNode can receive, unit: byte
  1256. # Any configuration related to the streaming service.
  1257. streaming:
  1258. walBalancer:
  1259. # The interval of balance task trigger at background, 1 min by default.
  1260. # It's ok to set it into duration string, such as 30s or 1m30s, see time.ParseDuration
  1261. triggerInterval: 1m
  1262. # The initial interval of balance task trigger backoff, 10 ms by default.
  1263. # It's ok to set it into duration string, such as 30s or 1m30s, see time.ParseDuration
  1264. backoffInitialInterval: 10ms
  1265. backoffMultiplier: 2 # The multiplier of balance task trigger backoff, 2 by default
  1266. # The max interval of balance task trigger backoff, 5s by default.
  1267. # It's ok to set it into duration string, such as 30s or 1m30s, see time.ParseDuration
  1268. backoffMaxInterval: 5s
  1269. # The timeout of wal balancer operation, 30s by default.
  1270. # If the operation exceeds this timeout, it will be canceled.
  1271. operationTimeout: 30s
  1272. balancePolicy:
  1273. name: vchannelFair # The multiplier of balance task trigger backoff, 2 by default
  1274. # Whether to allow rebalance, true by default.
  1275. # If the rebalance is not allowed, only the lost wal recovery will be executed, the rebalance (move a pchannel from one node to another node) will be skipped.
  1276. allowRebalance: true
  1277. minRebalanceIntervalThreshold: 5m # The max interval of rebalance for each wal, 5m by default.
  1278. # The threshold of recovery lag for rebalance, 1s by default.
  1279. # If the recovery lag is greater than this threshold, the rebalance of current pchannel is not allowed.
  1280. allowRebalanceRecoveryLagThreshold: 1s
  1281. vchannelFair:
  1282. # The weight of pchannel count in vchannelFair balance policy,
  1283. # the pchannel count will more evenly distributed if the weight is greater, 0.4 by default
  1284. pchannelWeight: 0.4
  1285. # The weight of vchannel count in vchannelFair balance policy,
  1286. # the vchannel count will more evenly distributed if the weight is greater, 0.3 by default
  1287. vchannelWeight: 0.3
  1288. # The weight of affinity in vchannelFair balance policy,
  1289. # the fewer VChannels belonging to the same Collection between two PChannels, the higher the affinity,
  1290. # the vchannel of one collection will more evenly distributed if the weight is greater, 0.01 by default
  1291. antiAffinityWeight: 0.01
  1292. # The tolerance of vchannelFair balance policy, if the score of two balance result is less than the tolerance,
  1293. # the balance result will be ignored, the lower tolerance, the sensitive rebalance, 0.01 by default
  1294. rebalanceTolerance: 0.01
  1295. # Indicates how many pchannels will be considered as a batch for rebalancing,
  1296. # the larger step, more aggressive and accurate rebalance,
  1297. # it also determine the depth of depth first search method that is used to find the best balance result, 3 by default
  1298. rebalanceMaxStep: 3
  1299. walBroadcaster:
  1300. concurrencyRatio: 4 # The concurrency ratio based on number of CPU for wal broadcaster, 4 by default.
  1301. txn:
  1302. defaultKeepaliveTimeout: 10s # The default keepalive timeout for wal txn, 10s by default
  1303. walWriteAheadBuffer:
  1304. capacity: 64m # The capacity of write ahead buffer of each wal, 64M by default
  1305. keepalive: 30s # The keepalive duration for entries in write ahead buffer of each wal, 30s by default
  1306. walReadAheadBuffer:
  1307. # The buffer length (pending message count) of read ahead buffer of each wal scanner can be used, 128 by default.
  1308. # Higher one will increase the throughput of wal message handling, but introduce higher memory utilization.
  1309. # Use the underlying wal default value if 0 is given.
  1310. length: 128
  1311. logging:
  1312. # The threshold of slow log, 1s by default.
  1313. # If the wal implementation is woodpecker, the minimum threshold is 3s
  1314. appendSlowThreshold: 1s
  1315. flush:
  1316. # The threshold of memory usage for one streaming node,
  1317. # If the memory usage is higher than this threshold, the node will try to trigger flush action to decrease the total of growing segment until growingSegmentBytesLwmThreshold,
  1318. # the value should be in the range of (0, 1), 0.6 by default.
  1319. memoryThreshold: 0.6
  1320. # The high watermark of total growing segment bytes for one streaming node,
  1321. # If the total bytes of growing segment is greater than this threshold,
  1322. # a flush process will be triggered to decrease total bytes of growing segment until growingSegmentBytesLwmThreshold, 0.2 by default
  1323. growingSegmentBytesHwmThreshold: 0.2
  1324. # The lower watermark of total growing segment bytes for one streaming node,
  1325. # growing segment flush process will try to flush some growing segment into sealed
  1326. # until the total bytes of growing segment is less than this threshold, 0.1 by default.
  1327. growingSegmentBytesLwmThreshold: 0.1
  1328. l0:
  1329. # The max lifetime of l0 segment, 10 minutes by default.
  1330. # If the l0 segment is older than this time, it will be flushed.
  1331. maxLifetime: 10m
  1332. # The max row num of l0 segment, 500000 by default.
  1333. # If the row num of l0 segment is greater than this num, it will be flushed.
  1334. maxRowNum: 500000
  1335. # The max size of l0 segment, 32m by default.
  1336. # If the binary size of l0 segment is greater than this size, it will be flushed.
  1337. maxSize: 32m
  1338. walRecovery:
  1339. # The interval of persist recovery info, 10s by default.
  1340. # Every the interval, the recovery info of wal will try to persist, and the checkpoint of wal can be advanced.
  1341. # Currently it only affect the recovery of wal, but not affect the recovery of data flush into object storage
  1342. persistInterval: 10s
  1343. # The max dirty message count of wal recovery, 100 by default.
  1344. # If there are more than this count of dirty message in wal recovery info, it will be persisted immediately,
  1345. # but not wait for the persist interval.
  1346. maxDirtyMessage: 100
  1347. # The graceful close timeout for wal recovery, 3s by default.
  1348. # When the wal is on-closing, the recovery module will try to persist the recovery info for wal to make next recovery operation more fast.
  1349. # If that persist operation exceeds this timeout, the wal recovery module will close right now.
  1350. gracefulCloseTimeout: 3s
  1351. # Any configuration related to the knowhere vector search engine
  1352. knowhere:
  1353. enable: true # When enable this configuration, the index parameters defined following will be automatically populated as index parameters, without requiring user input.
  1354. AISAQ:
  1355. build:
  1356. max_degree: 56 # Maximum degree of the Vamana graph
  1357. pq_code_budget_gb_ratio: 0.125 # Size limit on the PQ code (compared with raw data)
  1358. search_list_size: 100 # Size of the candidate list during building graph
  1359. search:
  1360. beam_width_ratio: 4 # Ratio between the maximum number of IO requests per search iteration and CPU number
  1361. DISKANN:
  1362. build:
  1363. max_degree: 56 # Maximum degree of the Vamana graph
  1364. pq_code_budget_gb_ratio: 0.125 # Size limit on the PQ code (compared with raw data)
  1365. search_cache_budget_gb_ratio: 0.1 # Ratio of cached node numbers to raw data
  1366. search_list_size: 100 # Size of the candidate list during building graph
  1367. search:
  1368. beam_width_ratio: 4 # Ratio between the maximum number of IO requests per search iteration and CPU number
  1369. # credential configs, support apikey, AKSK, gcp credential
  1370. # examples:
  1371. # credential:
  1372. # your_apikey_crendential_name:
  1373. # apikey: # Your apikey credential
  1374. # your_aksk_crendential_name:
  1375. # access_key_id:
  1376. # secret_access_key:
  1377. # your_gcp_credential_name:
  1378. # credential_json:
  1379. credential:
  1380. aksk1:
  1381. access_key_id: # Your access_key_id
  1382. secret_access_key: # Your secret_access_key
  1383. apikey1:
  1384. apikey: # Your apikey credential
  1385. gcp1:
  1386. credential_json: # base64 based gcp credential data
  1387. # Any configuration related to functions
  1388. function:
  1389. textEmbedding:
  1390. providers:
  1391. azure_openai:
  1392. credential: # The name in the crendential configuration item
  1393. enable: true # Whether to enable azure openai model service
  1394. resource_name: # Your azure openai resource name
  1395. url: # Your azure openai embedding url, Default is the official embedding url
  1396. bedrock:
  1397. credential: # The name in the crendential configuration item
  1398. enable: true # Whether to enable bedrock model service
  1399. cohere:
  1400. credential: # The name in the crendential configuration item
  1401. enable: true # Whether to enable cohere model service
  1402. url: # Your cohere embedding url, Default is the official embedding url
  1403. dashscope:
  1404. credential: # The name in the crendential configuration item
  1405. enable: true # Whether to enable dashscope model service
  1406. url: # Your dashscope embedding url, Default is the official embedding url
  1407. openai:
  1408. credential: # The name in the crendential configuration item
  1409. enable: true # Whether to enable openai model service
  1410. url: # Your openai embedding url, Default is the official embedding url
  1411. siliconflow:
  1412. credential: # The name in the crendential configuration item
  1413. enable: true # Whether to enable siliconflow model service
  1414. url: # Your siliconflow embedding url, Default is the official embedding url
  1415. tei:
  1416. credential: # The name in the crendential configuration item
  1417. enable: true # Whether to enable TEI model service
  1418. vertexai:
  1419. credential: # The name in the crendential configuration item
  1420. enable: true # Whether to enable vertexai model service
  1421. url: # Your VertexAI embedding url
  1422. voyageai:
  1423. credential: # The name in the crendential configuration item
  1424. enable: true # Whether to enable voyageai model service
  1425. url: # Your voyageai embedding url, Default is the official embedding url
  1426. rerank:
  1427. model:
  1428. providers:
  1429. cohere:
  1430. credential: # The name in the crendential configuration item
  1431. enable: true # Whether to enable cohere model service
  1432. url: # Your cohere rerank url, Default is the official rerank url
  1433. siliconflow:
  1434. credential: # The name in the crendential configuration item
  1435. enable: true # Whether to enable siliconflow model service
  1436. url: # Your siliconflow rerank url, Default is the official rerank url
  1437. tei:
  1438. credential: # The name in the crendential configuration item
  1439. enable: true # Whether to enable TEI rerank service
  1440. vllm:
  1441. credential: # The name in the crendential configuration item
  1442. enable: true # Whether to enable vllm rerank service
  1443. voyageai:
  1444. credential: # The name in the crendential configuration item
  1445. enable: true # Whether to enable voyageai model service
  1446. url: # Your voyageai rerank url, Default is the official rerank url
  1447. analyzer:
  1448. local_resource_path: /var/lib/milvus/analyzer