/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java

http://github.com/apache/lucene-solr · Java · 2338 lines · 1574 code · 276 blank · 488 comment · 271 complexity · 271a2a5f06509d9c9e3589f165418389 MD5 · raw file

Large files are truncated click here to view the full file

  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. package org.apache.solr.common.cloud;
  18. import java.lang.invoke.MethodHandles;
  19. import java.util.ArrayList;
  20. import java.util.Arrays;
  21. import java.util.Collection;
  22. import java.util.Collections;
  23. import java.util.EnumSet;
  24. import java.util.HashMap;
  25. import java.util.HashSet;
  26. import java.util.LinkedHashMap;
  27. import java.util.List;
  28. import java.util.Map;
  29. import java.util.Map.Entry;
  30. import java.util.Objects;
  31. import java.util.Set;
  32. import java.util.SortedSet;
  33. import java.util.TreeSet;
  34. import java.util.concurrent.ConcurrentHashMap;
  35. import java.util.concurrent.CountDownLatch;
  36. import java.util.concurrent.ExecutorService;
  37. import java.util.concurrent.Future;
  38. import java.util.concurrent.RejectedExecutionException;
  39. import java.util.concurrent.TimeUnit;
  40. import java.util.concurrent.TimeoutException;
  41. import java.util.concurrent.atomic.AtomicBoolean;
  42. import java.util.concurrent.atomic.AtomicReference;
  43. import java.util.function.Predicate;
  44. import java.util.function.UnaryOperator;
  45. import java.util.stream.Collectors;
  46. import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
  47. import org.apache.solr.common.AlreadyClosedException;
  48. import org.apache.solr.common.Callable;
  49. import org.apache.solr.common.SolrCloseable;
  50. import org.apache.solr.common.SolrException;
  51. import org.apache.solr.common.SolrException.ErrorCode;
  52. import org.apache.solr.common.params.AutoScalingParams;
  53. import org.apache.solr.common.params.CollectionAdminParams;
  54. import org.apache.solr.common.params.CoreAdminParams;
  55. import org.apache.solr.common.util.ExecutorUtil;
  56. import org.apache.solr.common.util.ObjectReleaseTracker;
  57. import org.apache.solr.common.util.Pair;
  58. import org.apache.solr.common.util.SolrNamedThreadFactory;
  59. import org.apache.solr.common.util.Utils;
  60. import org.apache.zookeeper.KeeperException;
  61. import org.apache.zookeeper.KeeperException.NoNodeException;
  62. import org.apache.zookeeper.WatchedEvent;
  63. import org.apache.zookeeper.Watcher;
  64. import org.apache.zookeeper.Watcher.Event.EventType;
  65. import org.apache.zookeeper.data.Stat;
  66. import org.slf4j.Logger;
  67. import org.slf4j.LoggerFactory;
  68. import static java.util.Collections.EMPTY_MAP;
  69. import static java.util.Collections.emptyMap;
  70. import static java.util.Collections.emptySet;
  71. import static java.util.Collections.emptySortedSet;
  72. import static org.apache.solr.common.util.Utils.fromJSON;
  73. public class ZkStateReader implements SolrCloseable {
  74. public static final int STATE_UPDATE_DELAY = Integer.getInteger("solr.OverseerStateUpdateDelay", 2000); // delay between cloud state updates
  75. private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
  76. public static final String BASE_URL_PROP = "base_url";
  77. public static final String NODE_NAME_PROP = "node_name";
  78. public static final String CORE_NODE_NAME_PROP = "core_node_name";
  79. public static final String ROLES_PROP = "roles";
  80. public static final String STATE_PROP = "state";
  81. // if this flag equals to false and the replica does not exist in cluster state, set state op become no op (default is true)
  82. public static final String FORCE_SET_STATE_PROP = "force_set_state";
  83. /**
  84. * SolrCore name.
  85. */
  86. public static final String CORE_NAME_PROP = "core";
  87. public static final String COLLECTION_PROP = "collection";
  88. public static final String ELECTION_NODE_PROP = "election_node";
  89. public static final String SHARD_ID_PROP = "shard";
  90. public static final String REPLICA_PROP = "replica";
  91. public static final String SHARD_RANGE_PROP = "shard_range";
  92. public static final String SHARD_STATE_PROP = "shard_state";
  93. public static final String SHARD_PARENT_PROP = "shard_parent";
  94. public static final String NUM_SHARDS_PROP = "numShards";
  95. public static final String LEADER_PROP = "leader";
  96. public static final String SHARED_STORAGE_PROP = "shared_storage";
  97. public static final String PROPERTY_PROP = "property";
  98. public static final String PROPERTY_PROP_PREFIX = "property.";
  99. public static final String PROPERTY_VALUE_PROP = "property.value";
  100. public static final String MAX_AT_ONCE_PROP = "maxAtOnce";
  101. public static final String MAX_WAIT_SECONDS_PROP = "maxWaitSeconds";
  102. public static final String STATE_TIMESTAMP_PROP = "stateTimestamp";
  103. public static final String COLLECTIONS_ZKNODE = "/collections";
  104. public static final String LIVE_NODES_ZKNODE = "/live_nodes";
  105. public static final String ALIASES = "/aliases.json";
  106. public static final String CLUSTER_STATE = "/clusterstate.json";
  107. public static final String CLUSTER_PROPS = "/clusterprops.json";
  108. public static final String COLLECTION_PROPS_ZKNODE = "collectionprops.json";
  109. public static final String REJOIN_AT_HEAD_PROP = "rejoinAtHead";
  110. public static final String SOLR_SECURITY_CONF_PATH = "/security.json";
  111. public static final String SOLR_AUTOSCALING_CONF_PATH = "/autoscaling.json";
  112. public static final String SOLR_AUTOSCALING_EVENTS_PATH = "/autoscaling/events";
  113. public static final String SOLR_AUTOSCALING_TRIGGER_STATE_PATH = "/autoscaling/triggerState";
  114. public static final String SOLR_AUTOSCALING_NODE_ADDED_PATH = "/autoscaling/nodeAdded";
  115. public static final String SOLR_AUTOSCALING_NODE_LOST_PATH = "/autoscaling/nodeLost";
  116. public static final String SOLR_PKGS_PATH = "/packages.json";
  117. public static final String DEFAULT_SHARD_PREFERENCES = "defaultShardPreferences";
  118. public static final String REPLICATION_FACTOR = "replicationFactor";
  119. public static final String MAX_SHARDS_PER_NODE = "maxShardsPerNode";
  120. public static final String AUTO_ADD_REPLICAS = "autoAddReplicas";
  121. public static final String MAX_CORES_PER_NODE = "maxCoresPerNode";
  122. public static final String PULL_REPLICAS = "pullReplicas";
  123. public static final String NRT_REPLICAS = "nrtReplicas";
  124. public static final String TLOG_REPLICAS = "tlogReplicas";
  125. public static final String READ_ONLY = "readOnly";
  126. public static final String ROLES = "/roles.json";
  127. public static final String CONFIGS_ZKNODE = "/configs";
  128. public final static String CONFIGNAME_PROP = "configName";
  129. public static final String LEGACY_CLOUD = "legacyCloud";
  130. public static final String SAMPLE_PERCENTAGE = "samplePercentage";
  131. /**
  132. * @deprecated use {@link org.apache.solr.common.params.CollectionAdminParams#DEFAULTS} instead.
  133. */
  134. @Deprecated
  135. public static final String COLLECTION_DEF = "collectionDefaults";
  136. public static final String URL_SCHEME = "urlScheme";
  137. private static final String SOLR_ENVIRONMENT = "environment";
  138. public static final String REPLICA_TYPE = "type";
  139. /**
  140. * A view of the current state of all collections; combines all the different state sources into a single view.
  141. */
  142. protected volatile ClusterState clusterState;
  143. private static final int GET_LEADER_RETRY_INTERVAL_MS = 50;
  144. private static final int GET_LEADER_RETRY_DEFAULT_TIMEOUT = Integer.parseInt(System.getProperty("zkReaderGetLeaderRetryTimeoutMs", "4000"));
  145. ;
  146. public static final String LEADER_ELECT_ZKNODE = "leader_elect";
  147. public static final String SHARD_LEADERS_ZKNODE = "leaders";
  148. public static final String ELECTION_NODE = "election";
  149. /**
  150. * Collections tracked in the legacy (shared) state format, reflects the contents of clusterstate.json.
  151. */
  152. private Map<String, ClusterState.CollectionRef> legacyCollectionStates = emptyMap();
  153. /**
  154. * Last seen ZK version of clusterstate.json.
  155. */
  156. private int legacyClusterStateVersion = 0;
  157. /**
  158. * Collections with format2 state.json, "interesting" and actively watched.
  159. */
  160. private final ConcurrentHashMap<String, DocCollection> watchedCollectionStates = new ConcurrentHashMap<>();
  161. /**
  162. * Collections with format2 state.json, not "interesting" and not actively watched.
  163. */
  164. private final ConcurrentHashMap<String, LazyCollectionRef> lazyCollectionStates = new ConcurrentHashMap<>();
  165. /**
  166. * Collection properties being actively watched
  167. */
  168. private final ConcurrentHashMap<String, VersionedCollectionProps> watchedCollectionProps = new ConcurrentHashMap<>();
  169. /**
  170. * Collection properties being actively watched
  171. */
  172. private final ConcurrentHashMap<String, PropsWatcher> collectionPropsWatchers = new ConcurrentHashMap<>();
  173. private volatile SortedSet<String> liveNodes = emptySortedSet();
  174. private volatile Map<String, Object> clusterProperties = Collections.emptyMap();
  175. private final ZkConfigManager configManager;
  176. private ConfigData securityData;
  177. private final Runnable securityNodeListener;
  178. private ConcurrentHashMap<String, CollectionWatch<DocCollectionWatcher>> collectionWatches = new ConcurrentHashMap<>();
  179. // named this observers so there's less confusion between CollectionPropsWatcher map and the PropsWatcher map.
  180. private ConcurrentHashMap<String, CollectionWatch<CollectionPropsWatcher>> collectionPropsObservers = new ConcurrentHashMap<>();
  181. private Set<CloudCollectionsListener> cloudCollectionsListeners = ConcurrentHashMap.newKeySet();
  182. private final ExecutorService notifications = ExecutorUtil.newMDCAwareCachedThreadPool("watches");
  183. private Set<LiveNodesListener> liveNodesListeners = ConcurrentHashMap.newKeySet();
  184. private Set<ClusterPropertiesListener> clusterPropertiesListeners = ConcurrentHashMap.newKeySet();
  185. /**
  186. * Used to submit notifications to Collection Properties watchers in order
  187. **/
  188. private final ExecutorService collectionPropsNotifications = ExecutorUtil.newMDCAwareSingleThreadExecutor(new SolrNamedThreadFactory("collectionPropsNotifications"));
  189. private static final long LAZY_CACHE_TIME = TimeUnit.NANOSECONDS.convert(STATE_UPDATE_DELAY, TimeUnit.MILLISECONDS);
  190. private Future<?> collectionPropsCacheCleaner; // only kept to identify if the cleaner has already been started.
  191. /**
  192. * Get current {@link AutoScalingConfig}.
  193. *
  194. * @return current configuration from <code>autoscaling.json</code>. NOTE:
  195. * this data is retrieved from ZK on each call.
  196. */
  197. public AutoScalingConfig getAutoScalingConfig() throws KeeperException, InterruptedException {
  198. return getAutoScalingConfig(null);
  199. }
  200. /**
  201. * Get current {@link AutoScalingConfig}.
  202. *
  203. * @param watcher optional {@link Watcher} to set on a znode to watch for config changes.
  204. * @return current configuration from <code>autoscaling.json</code>. NOTE:
  205. * this data is retrieved from ZK on each call.
  206. */
  207. public AutoScalingConfig getAutoScalingConfig(Watcher watcher) throws KeeperException, InterruptedException {
  208. Stat stat = new Stat();
  209. Map<String, Object> map = new HashMap<>();
  210. try {
  211. byte[] bytes = zkClient.getData(SOLR_AUTOSCALING_CONF_PATH, watcher, stat, true);
  212. if (bytes != null && bytes.length > 0) {
  213. map = (Map<String, Object>) fromJSON(bytes);
  214. }
  215. } catch (KeeperException.NoNodeException e) {
  216. // ignore
  217. }
  218. map.put(AutoScalingParams.ZK_VERSION, stat.getVersion());
  219. return new AutoScalingConfig(map);
  220. }
  221. private static class CollectionWatch<T> {
  222. int coreRefCount = 0;
  223. Set<T> stateWatchers = ConcurrentHashMap.newKeySet();
  224. public boolean canBeRemoved() {
  225. return coreRefCount + stateWatchers.size() == 0;
  226. }
  227. }
  228. public static final Set<String> KNOWN_CLUSTER_PROPS = Set.of(
  229. LEGACY_CLOUD,
  230. URL_SCHEME,
  231. AUTO_ADD_REPLICAS,
  232. CoreAdminParams.BACKUP_LOCATION,
  233. DEFAULT_SHARD_PREFERENCES,
  234. MAX_CORES_PER_NODE,
  235. SAMPLE_PERCENTAGE,
  236. SOLR_ENVIRONMENT,
  237. CollectionAdminParams.DEFAULTS);
  238. /**
  239. * Returns config set name for collection.
  240. * TODO move to DocCollection (state.json).
  241. *
  242. * @param collection to return config set name for
  243. */
  244. public String readConfigName(String collection) throws KeeperException {
  245. String configName = null;
  246. String path = COLLECTIONS_ZKNODE + "/" + collection;
  247. log.debug("Loading collection config from: [{}]", path);
  248. try {
  249. byte[] data = zkClient.getData(path, null, null, true);
  250. if (data == null) {
  251. log.warn("No config data found at path {}.", path);
  252. throw new KeeperException.NoNodeException("No config data found at path: " + path);
  253. }
  254. ZkNodeProps props = ZkNodeProps.load(data);
  255. configName = props.getStr(CONFIGNAME_PROP);
  256. if (configName == null) {
  257. log.warn("No config data found at path{}. ", path);
  258. throw new KeeperException.NoNodeException("No config data found at path: " + path);
  259. }
  260. } catch (InterruptedException e) {
  261. SolrZkClient.checkInterrupted(e);
  262. log.warn("Thread interrupted when loading config name for collection {}", collection);
  263. throw new SolrException(ErrorCode.SERVER_ERROR, "Thread interrupted when loading config name for collection " + collection, e);
  264. }
  265. return configName;
  266. }
  267. private final SolrZkClient zkClient;
  268. private final boolean closeClient;
  269. private volatile boolean closed = false;
  270. private Set<CountDownLatch> waitLatches = ConcurrentHashMap.newKeySet();
  271. public ZkStateReader(SolrZkClient zkClient) {
  272. this(zkClient, null);
  273. }
  274. public ZkStateReader(SolrZkClient zkClient, Runnable securityNodeListener) {
  275. this.zkClient = zkClient;
  276. this.configManager = new ZkConfigManager(zkClient);
  277. this.closeClient = false;
  278. this.securityNodeListener = securityNodeListener;
  279. assert ObjectReleaseTracker.track(this);
  280. }
  281. public ZkStateReader(String zkServerAddress, int zkClientTimeout, int zkClientConnectTimeout) {
  282. this.zkClient = new SolrZkClient(zkServerAddress, zkClientTimeout, zkClientConnectTimeout,
  283. // on reconnect, reload cloud info
  284. new OnReconnect() {
  285. @Override
  286. public void command() {
  287. try {
  288. ZkStateReader.this.createClusterStateWatchersAndUpdate();
  289. } catch (KeeperException e) {
  290. log.error("A ZK error has occurred", e);
  291. throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "A ZK error has occurred", e);
  292. } catch (InterruptedException e) {
  293. // Restore the interrupted status
  294. Thread.currentThread().interrupt();
  295. log.error("Interrupted", e);
  296. throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "Interrupted", e);
  297. }
  298. }
  299. });
  300. this.configManager = new ZkConfigManager(zkClient);
  301. this.closeClient = true;
  302. this.securityNodeListener = null;
  303. assert ObjectReleaseTracker.track(this);
  304. }
  305. public ZkConfigManager getConfigManager() {
  306. return configManager;
  307. }
  308. /**
  309. * Forcibly refresh cluster state from ZK. Do this only to avoid race conditions because it's expensive.
  310. * <p>
  311. * It is cheaper to call {@link #forceUpdateCollection(String)} on a single collection if you must.
  312. *
  313. * @lucene.internal
  314. */
  315. public void forciblyRefreshAllClusterStateSlow() throws KeeperException, InterruptedException {
  316. synchronized (getUpdateLock()) {
  317. if (clusterState == null) {
  318. // Never initialized, just run normal initialization.
  319. createClusterStateWatchersAndUpdate();
  320. return;
  321. }
  322. // No need to set watchers because we should already have watchers registered for everything.
  323. refreshCollectionList(null);
  324. refreshLiveNodes(null);
  325. refreshLegacyClusterState(null);
  326. // Need a copy so we don't delete from what we're iterating over.
  327. Collection<String> safeCopy = new ArrayList<>(watchedCollectionStates.keySet());
  328. Set<String> updatedCollections = new HashSet<>();
  329. for (String coll : safeCopy) {
  330. DocCollection newState = fetchCollectionState(coll, null);
  331. if (updateWatchedCollection(coll, newState)) {
  332. updatedCollections.add(coll);
  333. }
  334. }
  335. constructState(updatedCollections);
  336. }
  337. }
  338. /**
  339. * Forcibly refresh a collection's internal state from ZK. Try to avoid having to resort to this when
  340. * a better design is possible.
  341. */
  342. //TODO shouldn't we call ZooKeeper.sync() at the right places to prevent reading a stale value? We do so for aliases.
  343. public void forceUpdateCollection(String collection) throws KeeperException, InterruptedException {
  344. synchronized (getUpdateLock()) {
  345. if (clusterState == null) {
  346. log.warn("ClusterState watchers have not been initialized");
  347. return;
  348. }
  349. ClusterState.CollectionRef ref = clusterState.getCollectionRef(collection);
  350. if (ref == null || legacyCollectionStates.containsKey(collection)) {
  351. // We either don't know anything about this collection (maybe it's new?) or it's legacy.
  352. // First update the legacy cluster state.
  353. log.debug("Checking legacy cluster state for collection {}", collection);
  354. refreshLegacyClusterState(null);
  355. if (!legacyCollectionStates.containsKey(collection)) {
  356. // No dice, see if a new collection just got created.
  357. LazyCollectionRef tryLazyCollection = new LazyCollectionRef(collection);
  358. if (tryLazyCollection.get() != null) {
  359. // What do you know, it exists!
  360. log.debug("Adding lazily-loaded reference for collection {}", collection);
  361. lazyCollectionStates.putIfAbsent(collection, tryLazyCollection);
  362. constructState(Collections.singleton(collection));
  363. }
  364. }
  365. } else if (ref.isLazilyLoaded()) {
  366. log.debug("Refreshing lazily-loaded state for collection {}", collection);
  367. if (ref.get() != null) {
  368. return;
  369. }
  370. // Edge case: if there's no external collection, try refreshing legacy cluster state in case it's there.
  371. refreshLegacyClusterState(null);
  372. } else if (watchedCollectionStates.containsKey(collection)) {
  373. // Exists as a watched collection, force a refresh.
  374. log.debug("Forcing refresh of watched collection state for {}", collection);
  375. DocCollection newState = fetchCollectionState(collection, null);
  376. if (updateWatchedCollection(collection, newState)) {
  377. constructState(Collections.singleton(collection));
  378. }
  379. } else {
  380. log.error("Collection {} is not lazy or watched!", collection);
  381. }
  382. }
  383. }
  384. /**
  385. * Refresh the set of live nodes.
  386. */
  387. public void updateLiveNodes() throws KeeperException, InterruptedException {
  388. refreshLiveNodes(null);
  389. }
  390. public Integer compareStateVersions(String coll, int version) {
  391. DocCollection collection = clusterState.getCollectionOrNull(coll);
  392. if (collection == null) return null;
  393. if (collection.getZNodeVersion() < version) {
  394. if (log.isDebugEnabled()) {
  395. log.debug("Server older than client {}<{}", collection.getZNodeVersion(), version);
  396. }
  397. DocCollection nu = getCollectionLive(this, coll);
  398. if (nu == null) return -1;
  399. if (nu.getZNodeVersion() > collection.getZNodeVersion()) {
  400. if (updateWatchedCollection(coll, nu)) {
  401. synchronized (getUpdateLock()) {
  402. constructState(Collections.singleton(coll));
  403. }
  404. }
  405. collection = nu;
  406. }
  407. }
  408. if (collection.getZNodeVersion() == version) {
  409. return null;
  410. }
  411. if (log.isDebugEnabled()) {
  412. log.debug("Wrong version from client [{}]!=[{}]", version, collection.getZNodeVersion());
  413. }
  414. return collection.getZNodeVersion();
  415. }
  416. public synchronized void createClusterStateWatchersAndUpdate() throws KeeperException,
  417. InterruptedException {
  418. // We need to fetch the current cluster state and the set of live nodes
  419. log.debug("Updating cluster state from ZooKeeper... ");
  420. // Sanity check ZK structure.
  421. if (!zkClient.exists(CLUSTER_STATE, true)) {
  422. throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE,
  423. "Cannot connect to cluster at " + zkClient.getZkServerAddress() + ": cluster not found/not ready");
  424. }
  425. // on reconnect of SolrZkClient force refresh and re-add watches.
  426. loadClusterProperties();
  427. refreshLiveNodes(new LiveNodeWatcher());
  428. refreshLegacyClusterState(new LegacyClusterStateWatcher());
  429. refreshStateFormat2Collections();
  430. refreshCollectionList(new CollectionsChildWatcher());
  431. refreshAliases(aliasesManager);
  432. if (securityNodeListener != null) {
  433. addSecurityNodeWatcher(pair -> {
  434. ConfigData cd = new ConfigData();
  435. cd.data = pair.first() == null || pair.first().length == 0 ? EMPTY_MAP : Utils.getDeepCopy((Map) fromJSON(pair.first()), 4, false);
  436. cd.version = pair.second() == null ? -1 : pair.second().getVersion();
  437. securityData = cd;
  438. securityNodeListener.run();
  439. });
  440. securityData = getSecurityProps(true);
  441. }
  442. collectionPropsObservers.forEach((k, v) -> {
  443. collectionPropsWatchers.computeIfAbsent(k, PropsWatcher::new).refreshAndWatch(true);
  444. });
  445. }
  446. private void addSecurityNodeWatcher(final Callable<Pair<byte[], Stat>> callback)
  447. throws KeeperException, InterruptedException {
  448. zkClient.exists(SOLR_SECURITY_CONF_PATH,
  449. new Watcher() {
  450. @Override
  451. public void process(WatchedEvent event) {
  452. // session events are not change events, and do not remove the watcher
  453. if (EventType.None.equals(event.getType())) {
  454. return;
  455. }
  456. try {
  457. synchronized (ZkStateReader.this.getUpdateLock()) {
  458. log.debug("Updating [{}] ... ", SOLR_SECURITY_CONF_PATH);
  459. // remake watch
  460. final Watcher thisWatch = this;
  461. final Stat stat = new Stat();
  462. final byte[] data = getZkClient().getData(SOLR_SECURITY_CONF_PATH, thisWatch, stat, true);
  463. try {
  464. callback.call(new Pair<>(data, stat));
  465. } catch (Exception e) {
  466. log.error("Error running collections node listener", e);
  467. }
  468. }
  469. } catch (KeeperException.ConnectionLossException | KeeperException.SessionExpiredException e) {
  470. log.warn("ZooKeeper watch triggered, but Solr cannot talk to ZK: [{}]", e.getMessage());
  471. } catch (KeeperException e) {
  472. log.error("A ZK error has occurred", e);
  473. throw new ZooKeeperException(ErrorCode.SERVER_ERROR, "", e);
  474. } catch (InterruptedException e) {
  475. // Restore the interrupted status
  476. Thread.currentThread().interrupt();
  477. log.warn("Interrupted", e);
  478. }
  479. }
  480. }, true);
  481. }
  482. /**
  483. * Construct the total state view from all sources.
  484. * Must hold {@link #getUpdateLock()} before calling this.
  485. *
  486. * @param changedCollections collections that have changed since the last call,
  487. * and that should fire notifications
  488. */
  489. private void constructState(Set<String> changedCollections) {
  490. Set<String> liveNodes = this.liveNodes; // volatile read
  491. // Legacy clusterstate is authoritative, for backwards compatibility.
  492. // To move a collection's state to format2, first create the new state2 format node, then remove legacy entry.
  493. Map<String, ClusterState.CollectionRef> result = new LinkedHashMap<>(legacyCollectionStates);
  494. // Add state format2 collections, but don't override legacy collection states.
  495. for (Map.Entry<String, DocCollection> entry : watchedCollectionStates.entrySet()) {
  496. result.putIfAbsent(entry.getKey(), new ClusterState.CollectionRef(entry.getValue()));
  497. }
  498. // Finally, add any lazy collections that aren't already accounted for.
  499. for (Map.Entry<String, LazyCollectionRef> entry : lazyCollectionStates.entrySet()) {
  500. result.putIfAbsent(entry.getKey(), entry.getValue());
  501. }
  502. this.clusterState = new ClusterState(liveNodes, result, legacyClusterStateVersion);
  503. if (log.isDebugEnabled()) {
  504. log.debug("clusterStateSet: legacy [{}] interesting [{}] watched [{}] lazy [{}] total [{}]",
  505. legacyCollectionStates.keySet().size(),
  506. collectionWatches.keySet().size(),
  507. watchedCollectionStates.keySet().size(),
  508. lazyCollectionStates.keySet().size(),
  509. clusterState.getCollectionStates().size());
  510. }
  511. if (log.isTraceEnabled()) {
  512. log.trace("clusterStateSet: legacy [{}] interesting [{}] watched [{}] lazy [{}] total [{}]",
  513. legacyCollectionStates.keySet(),
  514. collectionWatches.keySet(),
  515. watchedCollectionStates.keySet(),
  516. lazyCollectionStates.keySet(),
  517. clusterState.getCollectionStates());
  518. }
  519. notifyCloudCollectionsListeners();
  520. for (String collection : changedCollections) {
  521. notifyStateWatchers(collection, clusterState.getCollectionOrNull(collection));
  522. }
  523. }
  524. /**
  525. * Refresh legacy (shared) clusterstate.json
  526. */
  527. private void refreshLegacyClusterState(Watcher watcher) throws KeeperException, InterruptedException {
  528. try {
  529. final Stat stat = new Stat();
  530. final byte[] data = zkClient.getData(CLUSTER_STATE, watcher, stat, true);
  531. final ClusterState loadedData = ClusterState.load(stat.getVersion(), data, emptySet(), CLUSTER_STATE);
  532. synchronized (getUpdateLock()) {
  533. if (this.legacyClusterStateVersion >= stat.getVersion()) {
  534. // Nothing to do, someone else updated same or newer.
  535. return;
  536. }
  537. Set<String> updatedCollections = new HashSet<>();
  538. for (String coll : this.collectionWatches.keySet()) {
  539. ClusterState.CollectionRef ref = this.legacyCollectionStates.get(coll);
  540. // legacy collections are always in-memory
  541. DocCollection oldState = ref == null ? null : ref.get();
  542. ClusterState.CollectionRef newRef = loadedData.getCollectionStates().get(coll);
  543. DocCollection newState = newRef == null ? null : newRef.get();
  544. if (newState == null) {
  545. // check that we haven't just migrated
  546. newState = watchedCollectionStates.get(coll);
  547. }
  548. if (!Objects.equals(oldState, newState)) {
  549. updatedCollections.add(coll);
  550. }
  551. }
  552. this.legacyCollectionStates = loadedData.getCollectionStates();
  553. this.legacyClusterStateVersion = stat.getVersion();
  554. constructState(updatedCollections);
  555. }
  556. } catch (KeeperException.NoNodeException e) {
  557. // Ignore missing legacy clusterstate.json.
  558. synchronized (getUpdateLock()) {
  559. this.legacyCollectionStates = emptyMap();
  560. this.legacyClusterStateVersion = 0;
  561. constructState(Collections.emptySet());
  562. }
  563. }
  564. }
  565. /**
  566. * Refresh state format2 collections.
  567. */
  568. private void refreshStateFormat2Collections() {
  569. for (String coll : collectionWatches.keySet()) {
  570. new StateWatcher(coll).refreshAndWatch();
  571. }
  572. }
  573. // We don't get a Stat or track versions on getChildren() calls, so force linearization.
  574. private final Object refreshCollectionListLock = new Object();
  575. /**
  576. * Search for any lazy-loadable state format2 collections.
  577. * <p>
  578. * A stateFormat=1 collection which is not interesting to us can also
  579. * be put into the {@link #lazyCollectionStates} map here. But that is okay
  580. * because {@link #constructState(Set)} will give priority to collections in the
  581. * shared collection state over this map.
  582. * In fact this is a clever way to avoid doing a ZK exists check on
  583. * the /collections/collection_name/state.json znode
  584. * Such an exists check is done in {@link ClusterState#hasCollection(String)} and
  585. * {@link ClusterState#getCollectionsMap()} methods
  586. * have a safeguard against exposing wrong collection names to the users
  587. */
  588. private void refreshCollectionList(Watcher watcher) throws KeeperException, InterruptedException {
  589. synchronized (refreshCollectionListLock) {
  590. List<String> children = null;
  591. try {
  592. children = zkClient.getChildren(COLLECTIONS_ZKNODE, watcher, true);
  593. } catch (KeeperException.NoNodeException e) {
  594. log.warn("Error fetching collection names: [{}]", e.getMessage());
  595. // fall through
  596. }
  597. if (children == null || children.isEmpty()) {
  598. lazyCollectionStates.clear();
  599. return;
  600. }
  601. // Don't lock getUpdateLock() here, we don't need it and it would cause deadlock.
  602. // Don't mess with watchedCollections, they should self-manage.
  603. // First, drop any children that disappeared.
  604. this.lazyCollectionStates.keySet().retainAll(children);
  605. for (String coll : children) {
  606. // We will create an eager collection for any interesting collections, so don't add to lazy.
  607. if (!collectionWatches.containsKey(coll)) {
  608. // Double check contains just to avoid allocating an object.
  609. LazyCollectionRef existing = lazyCollectionStates.get(coll);
  610. if (existing == null) {
  611. lazyCollectionStates.putIfAbsent(coll, new LazyCollectionRef(coll));
  612. }
  613. }
  614. }
  615. }
  616. }
  617. // We don't get a Stat or track versions on getChildren() calls, so force linearization.
  618. private final Object refreshCollectionsSetLock = new Object();
  619. // Ensures that only the latest getChildren fetch gets applied.
  620. private final AtomicReference<Set<String>> lastFetchedCollectionSet = new AtomicReference<>();
  621. /**
  622. * Register a CloudCollectionsListener to be called when the set of collections within a cloud changes.
  623. */
  624. public void registerCloudCollectionsListener(CloudCollectionsListener cloudCollectionsListener) {
  625. cloudCollectionsListeners.add(cloudCollectionsListener);
  626. notifyNewCloudCollectionsListener(cloudCollectionsListener);
  627. }
  628. /**
  629. * Remove a registered CloudCollectionsListener.
  630. */
  631. public void removeCloudCollectionsListener(CloudCollectionsListener cloudCollectionsListener) {
  632. cloudCollectionsListeners.remove(cloudCollectionsListener);
  633. }
  634. private void notifyNewCloudCollectionsListener(CloudCollectionsListener listener) {
  635. listener.onChange(Collections.emptySet(), lastFetchedCollectionSet.get());
  636. }
  637. private void notifyCloudCollectionsListeners() {
  638. notifyCloudCollectionsListeners(false);
  639. }
  640. private void notifyCloudCollectionsListeners(boolean notifyIfSame) {
  641. synchronized (refreshCollectionsSetLock) {
  642. final Set<String> newCollections = getCurrentCollections();
  643. final Set<String> oldCollections = lastFetchedCollectionSet.getAndSet(newCollections);
  644. if (!newCollections.equals(oldCollections) || notifyIfSame) {
  645. cloudCollectionsListeners.forEach(listener -> listener.onChange(oldCollections, newCollections));
  646. }
  647. }
  648. }
  649. private Set<String> getCurrentCollections() {
  650. Set<String> collections = new HashSet<>();
  651. collections.addAll(legacyCollectionStates.keySet());
  652. collections.addAll(watchedCollectionStates.keySet());
  653. collections.addAll(lazyCollectionStates.keySet());
  654. return collections;
  655. }
  656. private class LazyCollectionRef extends ClusterState.CollectionRef {
  657. private final String collName;
  658. private long lastUpdateTime;
  659. private DocCollection cachedDocCollection;
  660. public LazyCollectionRef(String collName) {
  661. super(null);
  662. this.collName = collName;
  663. this.lastUpdateTime = -1;
  664. }
  665. @Override
  666. public synchronized DocCollection get(boolean allowCached) {
  667. gets.incrementAndGet();
  668. if (!allowCached || lastUpdateTime < 0 || System.nanoTime() - lastUpdateTime > LAZY_CACHE_TIME) {
  669. boolean shouldFetch = true;
  670. if (cachedDocCollection != null) {
  671. Stat exists = null;
  672. try {
  673. exists = zkClient.exists(getCollectionPath(collName), null, true);
  674. } catch (Exception e) {
  675. }
  676. if (exists != null && exists.getVersion() == cachedDocCollection.getZNodeVersion()) {
  677. shouldFetch = false;
  678. }
  679. }
  680. if (shouldFetch) {
  681. cachedDocCollection = getCollectionLive(ZkStateReader.this, collName);
  682. lastUpdateTime = System.nanoTime();
  683. }
  684. }
  685. return cachedDocCollection;
  686. }
  687. @Override
  688. public boolean isLazilyLoaded() {
  689. return true;
  690. }
  691. @Override
  692. public String toString() {
  693. return "LazyCollectionRef(" + collName + ")";
  694. }
  695. }
  696. // We don't get a Stat or track versions on getChildren() calls, so force linearization.
  697. private final Object refreshLiveNodesLock = new Object();
  698. // Ensures that only the latest getChildren fetch gets applied.
  699. private final AtomicReference<SortedSet<String>> lastFetchedLiveNodes = new AtomicReference<>();
  700. /**
  701. * Refresh live_nodes.
  702. */
  703. private void refreshLiveNodes(Watcher watcher) throws KeeperException, InterruptedException {
  704. synchronized (refreshLiveNodesLock) {
  705. SortedSet<String> newLiveNodes;
  706. try {
  707. List<String> nodeList = zkClient.getChildren(LIVE_NODES_ZKNODE, watcher, true);
  708. newLiveNodes = new TreeSet<>(nodeList);
  709. } catch (KeeperException.NoNodeException e) {
  710. newLiveNodes = emptySortedSet();
  711. }
  712. lastFetchedLiveNodes.set(newLiveNodes);
  713. }
  714. // Can't lock getUpdateLock() until we release the other, it would cause deadlock.
  715. SortedSet<String> oldLiveNodes, newLiveNodes;
  716. synchronized (getUpdateLock()) {
  717. newLiveNodes = lastFetchedLiveNodes.getAndSet(null);
  718. if (newLiveNodes == null) {
  719. // Someone else won the race to apply the last update, just exit.
  720. return;
  721. }
  722. oldLiveNodes = this.liveNodes;
  723. this.liveNodes = newLiveNodes;
  724. if (clusterState != null) {
  725. clusterState.setLiveNodes(newLiveNodes);
  726. }
  727. }
  728. if (oldLiveNodes.size() != newLiveNodes.size()) {
  729. if (log.isInfoEnabled()) {
  730. log.info("Updated live nodes from ZooKeeper... ({}) -> ({})", oldLiveNodes.size(), newLiveNodes.size());
  731. }
  732. }
  733. if (log.isDebugEnabled()) {
  734. log.debug("Updated live nodes from ZooKeeper... {} -> {}", oldLiveNodes, newLiveNodes);
  735. }
  736. if (!oldLiveNodes.equals(newLiveNodes)) { // fire listeners
  737. liveNodesListeners.forEach(listener -> {
  738. if (listener.onChange(new TreeSet<>(oldLiveNodes), new TreeSet<>(newLiveNodes))) {
  739. removeLiveNodesListener(listener);
  740. }
  741. });
  742. }
  743. }
  744. public void registerClusterPropertiesListener(ClusterPropertiesListener listener) {
  745. // fire it once with current properties
  746. if (listener.onChange(getClusterProperties())) {
  747. removeClusterPropertiesListener(listener);
  748. } else {
  749. clusterPropertiesListeners.add(listener);
  750. }
  751. }
  752. public void removeClusterPropertiesListener(ClusterPropertiesListener listener) {
  753. clusterPropertiesListeners.remove(listener);
  754. }
  755. public void registerLiveNodesListener(LiveNodesListener listener) {
  756. // fire it once with current live nodes
  757. if (listener.onChange(new TreeSet<>(getClusterState().getLiveNodes()), new TreeSet<>(getClusterState().getLiveNodes()))) {
  758. removeLiveNodesListener(listener);
  759. }
  760. liveNodesListeners.add(listener);
  761. }
  762. public void removeLiveNodesListener(LiveNodesListener listener) {
  763. liveNodesListeners.remove(listener);
  764. }
  765. /**
  766. * @return information about the cluster from ZooKeeper
  767. */
  768. public ClusterState getClusterState() {
  769. return clusterState;
  770. }
  771. public Object getUpdateLock() {
  772. return this;
  773. }
  774. public void close() {
  775. this.closed = true;
  776. notifications.shutdownNow();
  777. waitLatches.parallelStream().forEach(c -> {
  778. c.countDown();
  779. });
  780. ExecutorUtil.shutdownAndAwaitTermination(notifications);
  781. ExecutorUtil.shutdownAndAwaitTermination(collectionPropsNotifications);
  782. if (closeClient) {
  783. zkClient.close();
  784. }
  785. assert ObjectReleaseTracker.release(this);
  786. }
  787. @Override
  788. public boolean isClosed() {
  789. return closed;
  790. }
  791. public String getLeaderUrl(String collection, String shard, int timeout) throws InterruptedException {
  792. ZkCoreNodeProps props = new ZkCoreNodeProps(getLeaderRetry(collection, shard, timeout));
  793. return props.getCoreUrl();
  794. }
  795. public Replica getLeader(Set<String> liveNodes, DocCollection docCollection, String shard) {
  796. Replica replica = docCollection != null ? docCollection.getLeader(shard) : null;
  797. if (replica != null && liveNodes.contains(replica.getNodeName())) {
  798. return replica;
  799. }
  800. return null;
  801. }
  802. public Replica getLeader(String collection, String shard) {
  803. if (clusterState != null) {
  804. DocCollection docCollection = clusterState.getCollectionOrNull(collection);
  805. Replica replica = docCollection != null ? docCollection.getLeader(shard) : null;
  806. if (replica != null && getClusterState().liveNodesContain(replica.getNodeName())) {
  807. return replica;
  808. }
  809. }
  810. return null;
  811. }
  812. public boolean isNodeLive(String node) {
  813. return liveNodes.contains(node);
  814. }
  815. /**
  816. * Get shard leader properties, with retry if none exist.
  817. */
  818. public Replica getLeaderRetry(String collection, String shard) throws InterruptedException {
  819. return getLeaderRetry(collection, shard, GET_LEADER_RETRY_DEFAULT_TIMEOUT);
  820. }
  821. /**
  822. * Get shard leader properties, with retry if none exist.
  823. */
  824. public Replica getLeaderRetry(String collection, String shard, int timeout) throws InterruptedException {
  825. AtomicReference<Replica> leader = new AtomicReference<>();
  826. try {
  827. waitForState(collection, timeout, TimeUnit.MILLISECONDS, (n, c) -> {
  828. if (c == null)
  829. return false;
  830. Replica l = getLeader(n, c, shard);
  831. if (l != null) {
  832. leader.set(l);
  833. return true;
  834. }
  835. return false;
  836. });
  837. } catch (TimeoutException e) {
  838. throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "No registered leader was found after waiting for "
  839. + timeout + "ms " + ", collection: " + collection + " slice: " + shard + " saw state=" + clusterState.getCollectionOrNull(collection)
  840. + " with live_nodes=" + clusterState.getLiveNodes());
  841. }
  842. return leader.get();
  843. }
  844. /**
  845. * Get path where shard leader properties live in zookeeper.
  846. */
  847. public static String getShardLeadersPath(String collection, String shardId) {
  848. return COLLECTIONS_ZKNODE + "/" + collection + "/"
  849. + SHARD_LEADERS_ZKNODE + (shardId != null ? ("/" + shardId)
  850. : "") + "/leader";
  851. }
  852. /**
  853. * Get path where shard leader elections ephemeral nodes are.
  854. */
  855. public static String getShardLeadersElectPath(String collection, String shardId) {
  856. return COLLECTIONS_ZKNODE + "/" + collection + "/"
  857. + LEADER_ELECT_ZKNODE + (shardId != null ? ("/" + shardId + "/" + ELECTION_NODE)
  858. : "");
  859. }
  860. public List<ZkCoreNodeProps> getReplicaProps(String collection, String shardId, String thisCoreNodeName) {
  861. return getReplicaProps(collection, shardId, thisCoreNodeName, null);
  862. }
  863. public List<ZkCoreNodeProps> getReplicaProps(String collection, String shardId, String thisCoreNodeName,
  864. Replica.State mustMatchStateFilter) {
  865. return getReplicaProps(collection, shardId, thisCoreNodeName, mustMatchStateFilter, null);
  866. }
  867. public List<ZkCoreNodeProps> getReplicaProps(String collection, String shardId, String thisCoreNodeName,
  868. Replica.State mustMatchStateFilter, Replica.State mustNotMatchStateFilter) {
  869. //TODO: We don't need all these getReplicaProps method overloading. Also, it's odd that the default is to return replicas of type TLOG and NRT only
  870. return getReplicaProps(collection, shardId, thisCoreNodeName, mustMatchStateFilter, null, EnumSet.of(Replica.Type.TLOG, Replica.Type.NRT));
  871. }
  872. public List<ZkCoreNodeProps> getReplicaProps(String collection, String shardId, String thisCoreNodeName,
  873. Replica.State mustMatchStateFilter, Replica.State mustNotMatchStateFilter, final EnumSet<Replica.Type> acceptReplicaType) {
  874. assert thisCoreNodeName != null;
  875. ClusterState clusterState = this.clusterState;
  876. if (clusterState == null) {
  877. return null;
  878. }
  879. final DocCollection docCollection = clusterState.getCollectionOrNull(collection);
  880. if (docCollection == null || docCollection.getSlicesMap() == null) {
  881. throw new ZooKeeperException(ErrorCode.BAD_REQUEST,
  882. "Could not find collection in zk: " + collection);
  883. }
  884. Map<String, Slice> slices = docCollection.getSlicesMap();
  885. Slice replicas = slices.get(shardId);
  886. if (replicas == null) {
  887. throw new ZooKeeperException(ErrorCode.BAD_REQUEST, "Could not find shardId in zk: " + shardId);
  888. }
  889. Map<String, Replica> shardMap = replicas.getReplicasMap();
  890. List<ZkCoreNodeProps> nodes = new ArrayList<>(shardMap.size());
  891. for (Entry<String, Replica> entry : shardMap.entrySet().stream().filter((e) -> acceptReplicaType.contains(e.getValue().getType())).collect(Collectors.toList())) {
  892. ZkCoreNodeProps nodeProps = new ZkCoreNodeProps(entry.getValue());
  893. String coreNodeName = entry.getValue().getName();
  894. if (clusterState.liveNodesContain(nodeProps.getNodeName()) && !coreNodeName.equals(thisCoreNodeName)) {
  895. if (mustMatchStateFilter == null || mustMatchStateFilter == Replica.State.getState(nodeProps.getState())) {
  896. if (mustNotMatchStateFilter == null || mustNotMatchStateFilter != Replica.State.getState(nodeProps.getState())) {
  897. nodes.add(nodeProps);
  898. }
  899. }
  900. }
  901. }
  902. if (nodes.size() == 0) {
  903. // no replicas
  904. return null;
  905. }
  906. return nodes;
  907. }
  908. public SolrZkClient getZkClient() {
  909. return zkClient;
  910. }
  911. /**
  912. * Get a cluster property
  913. * <p>
  914. * N.B. Cluster properties are updated via ZK watchers, and so may not necessarily
  915. * be completely up-to-date. If you need to get the latest version, then use a
  916. * {@link ClusterProperties} instance.
  917. *
  918. * @param key the property to read
  919. * @param defaultValue a default value to use if no such property exists
  920. * @param <T> the type of the property
  921. * @return the cluster property, or a default if the property is not set
  922. */
  923. @SuppressWarnings("unchecked")
  924. public <T> T getClusterProperty(String key, T defaultValue) {
  925. T value = (T) Utils.getObjectByPath(clusterProperties, false, key);
  926. if (value == null)
  927. return defaultValue;
  928. return value;
  929. }
  930. /**
  931. * Same as the above but allows a full json path as a list of parts
  932. *
  933. * @param keyPath path to the property example ["collectionDefauls", "numShards"]
  934. * @param defaultValue a default value to use if no such property exists
  935. * @return the cluster property, or a default if the property is not set
  936. */
  937. public <T> T getClusterProperty(List<String> keyPath, T defaultValue) {
  938. T value = (T) Utils.getObjectByPath(clusterProperties, false, keyPath);
  939. if (value == null)
  940. return defaultValue;
  941. return value;
  942. }
  943. /**
  944. * Get all cluster properties for this cluster
  945. * <p>
  946. * N.B. Cluster properties are updated via ZK watchers, and so may not necessarily
  947. * be completely up-to-date. If you need to get the latest version, then use a
  948. * {@link ClusterProperties} instance.
  949. *
  950. * @return a Map of cluster properties
  951. */
  952. public Map<String, Object> getClusterProperties() {
  953. return Collections.unmodifiableMap(clusterProperties);
  954. }
  955. private final Watcher clusterPropertiesWatcher = event -> {
  956. // session events are not change events, and do not remove the watcher
  957. if (Watcher.Event.EventType.None.equals(event.getType())) {
  958. return;
  959. }
  960. loadClusterProperties();
  961. };
  962. @SuppressWarnings("unchecked")
  963. private void loadClusterProperties() {
  964. try {
  965. while (true) {
  966. try {
  967. byte[] data = zkClient.getData(ZkStateReader.CLUSTER_PROPS, clusterPropertiesWatcher, new Stat(), true);
  968. this.clusterProperties = ClusterProperties.convertCollectionDefaultsToNestedFormat((Map<String, Object>) Utils.fromJSON(data));
  969. log.debug("Loaded cluster properties: {}", this.clusterProperties);
  970. for (ClusterPropertiesListener listener : clusterPropertiesListeners) {
  971. listener.onChange(getClusterProperties());
  972. }
  973. return;
  974. } catch (KeeperException.NoNodeException e) {
  975. this.clusterProperties = Collections.emptyMap();
  976. log.debug("Loaded empty cluster properties");
  977. // set an exists watch, and if the node has been created since the last call,
  978. // read the data again
  979. if (zkClient.exists(ZkStateReader.CLUSTER_PROPS, clusterPropertiesWatcher, true) == null)
  980. return;
  981. }
  982. }
  983. } catch (KeeperException | InterruptedException e) {
  984. log.error("Error reading cluster properties from zookeeper", SolrZkClient.checkInterrupted(e));
  985. }
  986. }
  987. /**
  988. * Get collection properties for a given collection. If the collection is watched, simply return it from the cache,
  989. * otherwise fetch it directly from zookeeper. This is a convenience for {@code getCollectionProperties(collection,0)}
  990. *
  991. * @param collection the collection for which properties are desired
  992. * @return a map representing the key/value properties for the collection.
  993. */
  994. public Map<String, String> getCollectionProperties(final String collection) {
  995. return getCollectionProperties(collection, 0);
  996. }
  997. /**
  998. * Get and cache collection properties for a given collection. If the collection is watched, or still cached
  999. * simply return it from the cache, otherwise fetch it directly from zookeeper and retain the value for at
  1000. * least cacheForMillis milliseconds. Cached properties are watched in zookeeper and updated automatically.
  1001. * This version of {@code getCollectionProperties} should be used when properties need to be consulted
  1002. * frequently in the absence of an active {@link CollectionPropsWatcher}.
  1003. *
  1004. * @param collection The collection for which properties are desired
  1005. * @param cacheForMillis The minimum number of milliseconds to maintain a cache for the specified collection's
  1006. * properties. Setting a {@code CollectionPropsWatcher} will override this value and retain
  1007. * the cache for the life of the watcher. A lack of changes in zookeeper may allow the
  1008. * caching to remain for a greater duration up to the cycle time of {@link CacheCleaner}.
  1009. * Passing zero for this value will explicitly remove the cached copy if and only if it is
  1010. * due to expire and no watch exists. Any positive value will extend the expiration time
  1011. * if required.
  1012. * @return a map representing the key/value properties for the collection.
  1013. */
  1014. public Map<String, String> getCollectionProperties(final String collection, long cacheForMillis) {
  1015. synchronized (watchedCollectionProps) { // making decisions based on the result of a get...
  1016. Watcher watcher = null;
  1017. if (cacheForMillis > 0) {
  1018. watcher = collectionPropsWatchers.compute(collection,
  1019. (c, w) -> w == null ? new PropsWatcher(c, cacheForMillis) : w.renew(cacheForMillis));
  1020. }
  1021. VersionedCollectionProps vprops = watchedCollectionProps.get(collection);
  1022. boolean haveUnexpiredProps = vprops != null && vprops.cacheUntilNs > System.nanoTime();
  1023. long untilNs = System.nanoTime() + TimeUnit.NANOSECONDS.convert(cacheForMillis, TimeUnit.MILLISECONDS);
  1024. Map<String, String> properties;
  1025. if (haveUnexpiredProps) {
  1026. properties = vprops.props;
  1027. vprops.cacheUntilNs = Math.max(vprops.cacheUntilNs, untilNs);
  1028. } else {
  1029. try {
  1030. VersionedCollectionProps vcp = fetchCollectionProperties(collection, watcher);
  1031. properties = vcp.props;
  1032. if (cacheForMillis > 0) {
  1033. vcp.cacheUntilNs = untilNs;
  1034. watchedCollectionProps.put(collection, vcp);
  1035. } else {
  1036. // we're synchronized on watchedCollectionProps and we can only get here if we have found an expired
  1037. // vprops above, so it is safe to remove the cached value and let the GC free up some mem a bit sooner.
  1038. if (!collectionPropsObservers.containsKey(collection)) {
  1039. watchedCollectionProps.remove(collection);
  1040. }
  1041. }
  1042. } catch (Exception e) {
  1043. throw new SolrException(ErrorCode.SERVER_ERROR, "Error reading collection properties", SolrZkClient.checkInterrupted(e));
  1044. }
  1045. }
  1046. return properties;
  1047. }
  1048. }
  1049. private class VersionedCollectionProps {
  1050. int zkVersion;
  1051. Map<String, String> props;
  1052. long cacheUntilNs = 0;
  1053. VersionedCollectionProps(int zkVersion, Map<String, String> props) {
  1054. this.zkVersion = zkVersion;
  1055. this.props = props;
  1056. }
  1057. }
  1058. static String getCollectionPropsPath(final String collection) {
  1059. return COLLECTIONS_ZKNODE + '/' + collection + '/' + COLLECTION_PROPS_ZKNODE;
  1060. }
  1061. @SuppressWarnings("unchecked")
  1062. private VersionedCollectionProps fetchCollectionProperties(String collection, Watcher watcher) throws KeeperException, InterruptedException {
  1063. final String znodePath = getCollectionPropsPath(collection);
  1064. // lazy init cache cleaner once we know someone is using collection properties.
  1065. if (collectionPropsCacheCl