Fixed critical ME Problems

This commit is contained in:
Alexey
2026-02-17 03:40:39 +03:00
parent 8bd02d8099
commit 168fd59187
7 changed files with 459 additions and 103 deletions

View File

@@ -9,14 +9,14 @@ use crate::error::{ProxyError, Result};
use crate::protocol::constants::RPC_CLOSE_EXT_U32;
use super::MePool;
use super::codec::RpcWriter;
use super::wire::build_proxy_req_payload;
use crate::crypto::SecureRandom;
use rand::seq::SliceRandom;
use super::registry::ConnMeta;
impl MePool {
pub async fn send_proxy_req(
&self,
self: &Arc<Self>,
conn_id: u64,
target_dc: i16,
client_addr: SocketAddr,
@@ -32,18 +32,45 @@ impl MePool {
self.proxy_tag.as_deref(),
proto_flags,
);
let meta = ConnMeta {
target_dc,
client_addr,
our_addr,
proto_flags,
};
loop {
let ws = self.writers.read().await;
if ws.is_empty() {
return Err(ProxyError::Proxy("All ME connections dead".into()));
if let Some(current) = self.registry.get_writer(conn_id).await {
let send_res = {
if let Ok(mut guard) = current.writer.try_lock() {
let r = guard.send(&payload).await;
drop(guard);
r
} else {
current.writer.lock().await.send(&payload).await
}
};
match send_res {
Ok(()) => return Ok(()),
Err(e) => {
warn!(error = %e, writer_id = current.writer_id, "ME write failed");
self.remove_writer_and_reroute(current.writer_id).await;
continue;
}
}
}
let writers: Vec<(SocketAddr, Arc<Mutex<RpcWriter>>)> = ws.iter().cloned().collect();
drop(ws);
let mut candidate_indices = self.candidate_indices_for_dc(&writers, target_dc).await;
let mut writers_snapshot = {
let ws = self.writers.read().await;
if ws.is_empty() {
return Err(ProxyError::Proxy("All ME connections dead".into()));
}
ws.clone()
};
let mut candidate_indices = self.candidate_indices_for_dc(&writers_snapshot, target_dc).await;
if candidate_indices.is_empty() {
// Emergency: try to connect to target DC addresses on the fly, then recompute writers
// Emergency connect-on-demand
let map = self.proxy_map_v4.read().await;
if let Some(addrs) = map.get(&(target_dc as i32)) {
let mut shuffled = addrs.clone();
@@ -56,64 +83,71 @@ impl MePool {
}
}
let ws2 = self.writers.read().await;
let writers: Vec<(SocketAddr, Arc<Mutex<RpcWriter>>)> = ws2.iter().cloned().collect();
writers_snapshot = ws2.clone();
drop(ws2);
candidate_indices = self.candidate_indices_for_dc(&writers, target_dc).await;
candidate_indices = self.candidate_indices_for_dc(&writers_snapshot, target_dc).await;
}
if candidate_indices.is_empty() {
return Err(ProxyError::Proxy("No ME writers available for target DC".into()));
}
}
candidate_indices.sort_by_key(|idx| {
writers_snapshot[*idx]
.degraded
.load(Ordering::Relaxed)
.then_some(1usize)
.unwrap_or(0)
});
let start = self.rr.fetch_add(1, Ordering::Relaxed) as usize % candidate_indices.len();
// Prefer immediately available writer to avoid waiting on stalled connection.
for offset in 0..candidate_indices.len() {
let cidx = (start + offset) % candidate_indices.len();
let idx = candidate_indices[cidx];
let w = writers[idx].1.clone();
if let Ok(mut guard) = w.try_lock() {
let idx = candidate_indices[(start + offset) % candidate_indices.len()];
let w = &writers_snapshot[idx];
if let Ok(mut guard) = w.writer.try_lock() {
let send_res = guard.send(&payload).await;
drop(guard);
match send_res {
Ok(()) => return Ok(()),
Ok(()) => {
self.registry
.bind_writer(conn_id, w.id, w.writer.clone(), meta.clone())
.await;
return Ok(());
}
Err(e) => {
warn!(error = %e, "ME write failed, removing dead conn");
let mut ws = self.writers.write().await;
ws.retain(|(_, o)| !Arc::ptr_eq(o, &w));
if ws.is_empty() {
return Err(ProxyError::Proxy("All ME connections dead".into()));
}
warn!(error = %e, writer_id = w.id, "ME write failed");
self.remove_writer_and_reroute(w.id).await;
continue;
}
}
}
}
// All writers are currently busy, wait for the selected one.
let w = writers[candidate_indices[start]].1.clone();
match w.lock().await.send(&payload).await {
Ok(()) => return Ok(()),
let w = writers_snapshot[candidate_indices[start]].clone();
match w.writer.lock().await.send(&payload).await {
Ok(()) => {
self.registry
.bind_writer(conn_id, w.id, w.writer.clone(), meta.clone())
.await;
return Ok(());
}
Err(e) => {
warn!(error = %e, "ME write failed, removing dead conn");
let mut ws = self.writers.write().await;
ws.retain(|(_, o)| !Arc::ptr_eq(o, &w));
if ws.is_empty() {
return Err(ProxyError::Proxy("All ME connections dead".into()));
}
warn!(error = %e, writer_id = w.id, "ME write failed (blocking)");
self.remove_writer_and_reroute(w.id).await;
}
}
}
}
pub async fn send_close(&self, conn_id: u64) -> Result<()> {
pub async fn send_close(self: &Arc<Self>, conn_id: u64) -> Result<()> {
if let Some(w) = self.registry.get_writer(conn_id).await {
let mut p = Vec::with_capacity(12);
p.extend_from_slice(&RPC_CLOSE_EXT_U32.to_le_bytes());
p.extend_from_slice(&conn_id.to_le_bytes());
if let Err(e) = w.lock().await.send(&p).await {
if let Err(e) = w.writer.lock().await.send(&p).await {
debug!(error = %e, "ME close write failed");
let mut ws = self.writers.write().await;
ws.retain(|(_, o)| !Arc::ptr_eq(o, &w));
self.remove_writer_and_reroute(w.writer_id).await;
}
} else {
debug!(conn_id, "ME close skipped (writer missing)");
@@ -129,7 +163,7 @@ impl MePool {
pub(super) async fn candidate_indices_for_dc(
&self,
writers: &[(SocketAddr, Arc<Mutex<RpcWriter>>)],
writers: &[super::pool::MeWriter],
target_dc: i16,
) -> Vec<usize> {
let mut preferred = Vec::<SocketAddr>::new();
@@ -165,8 +199,8 @@ impl MePool {
}
let mut out = Vec::new();
for (idx, (addr, _)) in writers.iter().enumerate() {
if preferred.iter().any(|p| p == addr) {
for (idx, w) in writers.iter().enumerate() {
if preferred.iter().any(|p| *p == w.addr) {
out.push(idx);
}
}