Fixed critical ME Problems
This commit is contained in:
@@ -9,14 +9,14 @@ use crate::error::{ProxyError, Result};
|
||||
use crate::protocol::constants::RPC_CLOSE_EXT_U32;
|
||||
|
||||
use super::MePool;
|
||||
use super::codec::RpcWriter;
|
||||
use super::wire::build_proxy_req_payload;
|
||||
use crate::crypto::SecureRandom;
|
||||
use rand::seq::SliceRandom;
|
||||
use super::registry::ConnMeta;
|
||||
|
||||
impl MePool {
|
||||
pub async fn send_proxy_req(
|
||||
&self,
|
||||
self: &Arc<Self>,
|
||||
conn_id: u64,
|
||||
target_dc: i16,
|
||||
client_addr: SocketAddr,
|
||||
@@ -32,18 +32,45 @@ impl MePool {
|
||||
self.proxy_tag.as_deref(),
|
||||
proto_flags,
|
||||
);
|
||||
let meta = ConnMeta {
|
||||
target_dc,
|
||||
client_addr,
|
||||
our_addr,
|
||||
proto_flags,
|
||||
};
|
||||
|
||||
loop {
|
||||
let ws = self.writers.read().await;
|
||||
if ws.is_empty() {
|
||||
return Err(ProxyError::Proxy("All ME connections dead".into()));
|
||||
if let Some(current) = self.registry.get_writer(conn_id).await {
|
||||
let send_res = {
|
||||
if let Ok(mut guard) = current.writer.try_lock() {
|
||||
let r = guard.send(&payload).await;
|
||||
drop(guard);
|
||||
r
|
||||
} else {
|
||||
current.writer.lock().await.send(&payload).await
|
||||
}
|
||||
};
|
||||
match send_res {
|
||||
Ok(()) => return Ok(()),
|
||||
Err(e) => {
|
||||
warn!(error = %e, writer_id = current.writer_id, "ME write failed");
|
||||
self.remove_writer_and_reroute(current.writer_id).await;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
let writers: Vec<(SocketAddr, Arc<Mutex<RpcWriter>>)> = ws.iter().cloned().collect();
|
||||
drop(ws);
|
||||
|
||||
let mut candidate_indices = self.candidate_indices_for_dc(&writers, target_dc).await;
|
||||
let mut writers_snapshot = {
|
||||
let ws = self.writers.read().await;
|
||||
if ws.is_empty() {
|
||||
return Err(ProxyError::Proxy("All ME connections dead".into()));
|
||||
}
|
||||
ws.clone()
|
||||
};
|
||||
|
||||
let mut candidate_indices = self.candidate_indices_for_dc(&writers_snapshot, target_dc).await;
|
||||
if candidate_indices.is_empty() {
|
||||
// Emergency: try to connect to target DC addresses on the fly, then recompute writers
|
||||
// Emergency connect-on-demand
|
||||
let map = self.proxy_map_v4.read().await;
|
||||
if let Some(addrs) = map.get(&(target_dc as i32)) {
|
||||
let mut shuffled = addrs.clone();
|
||||
@@ -56,64 +83,71 @@ impl MePool {
|
||||
}
|
||||
}
|
||||
let ws2 = self.writers.read().await;
|
||||
let writers: Vec<(SocketAddr, Arc<Mutex<RpcWriter>>)> = ws2.iter().cloned().collect();
|
||||
writers_snapshot = ws2.clone();
|
||||
drop(ws2);
|
||||
candidate_indices = self.candidate_indices_for_dc(&writers, target_dc).await;
|
||||
candidate_indices = self.candidate_indices_for_dc(&writers_snapshot, target_dc).await;
|
||||
}
|
||||
if candidate_indices.is_empty() {
|
||||
return Err(ProxyError::Proxy("No ME writers available for target DC".into()));
|
||||
}
|
||||
}
|
||||
|
||||
candidate_indices.sort_by_key(|idx| {
|
||||
writers_snapshot[*idx]
|
||||
.degraded
|
||||
.load(Ordering::Relaxed)
|
||||
.then_some(1usize)
|
||||
.unwrap_or(0)
|
||||
});
|
||||
|
||||
let start = self.rr.fetch_add(1, Ordering::Relaxed) as usize % candidate_indices.len();
|
||||
|
||||
// Prefer immediately available writer to avoid waiting on stalled connection.
|
||||
for offset in 0..candidate_indices.len() {
|
||||
let cidx = (start + offset) % candidate_indices.len();
|
||||
let idx = candidate_indices[cidx];
|
||||
let w = writers[idx].1.clone();
|
||||
if let Ok(mut guard) = w.try_lock() {
|
||||
let idx = candidate_indices[(start + offset) % candidate_indices.len()];
|
||||
let w = &writers_snapshot[idx];
|
||||
if let Ok(mut guard) = w.writer.try_lock() {
|
||||
let send_res = guard.send(&payload).await;
|
||||
drop(guard);
|
||||
match send_res {
|
||||
Ok(()) => return Ok(()),
|
||||
Ok(()) => {
|
||||
self.registry
|
||||
.bind_writer(conn_id, w.id, w.writer.clone(), meta.clone())
|
||||
.await;
|
||||
return Ok(());
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(error = %e, "ME write failed, removing dead conn");
|
||||
let mut ws = self.writers.write().await;
|
||||
ws.retain(|(_, o)| !Arc::ptr_eq(o, &w));
|
||||
if ws.is_empty() {
|
||||
return Err(ProxyError::Proxy("All ME connections dead".into()));
|
||||
}
|
||||
warn!(error = %e, writer_id = w.id, "ME write failed");
|
||||
self.remove_writer_and_reroute(w.id).await;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// All writers are currently busy, wait for the selected one.
|
||||
let w = writers[candidate_indices[start]].1.clone();
|
||||
match w.lock().await.send(&payload).await {
|
||||
Ok(()) => return Ok(()),
|
||||
let w = writers_snapshot[candidate_indices[start]].clone();
|
||||
match w.writer.lock().await.send(&payload).await {
|
||||
Ok(()) => {
|
||||
self.registry
|
||||
.bind_writer(conn_id, w.id, w.writer.clone(), meta.clone())
|
||||
.await;
|
||||
return Ok(());
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(error = %e, "ME write failed, removing dead conn");
|
||||
let mut ws = self.writers.write().await;
|
||||
ws.retain(|(_, o)| !Arc::ptr_eq(o, &w));
|
||||
if ws.is_empty() {
|
||||
return Err(ProxyError::Proxy("All ME connections dead".into()));
|
||||
}
|
||||
warn!(error = %e, writer_id = w.id, "ME write failed (blocking)");
|
||||
self.remove_writer_and_reroute(w.id).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn send_close(&self, conn_id: u64) -> Result<()> {
|
||||
pub async fn send_close(self: &Arc<Self>, conn_id: u64) -> Result<()> {
|
||||
if let Some(w) = self.registry.get_writer(conn_id).await {
|
||||
let mut p = Vec::with_capacity(12);
|
||||
p.extend_from_slice(&RPC_CLOSE_EXT_U32.to_le_bytes());
|
||||
p.extend_from_slice(&conn_id.to_le_bytes());
|
||||
if let Err(e) = w.lock().await.send(&p).await {
|
||||
if let Err(e) = w.writer.lock().await.send(&p).await {
|
||||
debug!(error = %e, "ME close write failed");
|
||||
let mut ws = self.writers.write().await;
|
||||
ws.retain(|(_, o)| !Arc::ptr_eq(o, &w));
|
||||
self.remove_writer_and_reroute(w.writer_id).await;
|
||||
}
|
||||
} else {
|
||||
debug!(conn_id, "ME close skipped (writer missing)");
|
||||
@@ -129,7 +163,7 @@ impl MePool {
|
||||
|
||||
pub(super) async fn candidate_indices_for_dc(
|
||||
&self,
|
||||
writers: &[(SocketAddr, Arc<Mutex<RpcWriter>>)],
|
||||
writers: &[super::pool::MeWriter],
|
||||
target_dc: i16,
|
||||
) -> Vec<usize> {
|
||||
let mut preferred = Vec::<SocketAddr>::new();
|
||||
@@ -165,8 +199,8 @@ impl MePool {
|
||||
}
|
||||
|
||||
let mut out = Vec::new();
|
||||
for (idx, (addr, _)) in writers.iter().enumerate() {
|
||||
if preferred.iter().any(|p| p == addr) {
|
||||
for (idx, w) in writers.iter().enumerate() {
|
||||
if preferred.iter().any(|p| *p == w.addr) {
|
||||
out.push(idx);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user