Improved perf for ME

Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
This commit is contained in:
Alexey
2026-02-17 04:16:16 +03:00
parent 168fd59187
commit c03db683a5
13 changed files with 221 additions and 13 deletions

View File

@@ -174,6 +174,7 @@ impl RpcWriter {
if buf.len() >= 16 {
self.iv.copy_from_slice(&buf[buf.len() - 16..]);
}
self.writer.write_all(&buf).await.map_err(ProxyError::Io)
self.writer.write_all(&buf).await.map_err(ProxyError::Io)?;
self.writer.flush().await.map_err(ProxyError::Io)
}
}

View File

@@ -4,6 +4,7 @@ use std::sync::Arc;
use std::time::Duration;
use regex::Regex;
use httpdate;
use tracing::{debug, info, warn};
use crate::error::Result;
@@ -11,6 +12,7 @@ use crate::error::Result;
use super::MePool;
use super::secret::download_proxy_secret;
use crate::crypto::SecureRandom;
use std::time::SystemTime;
#[derive(Debug, Clone, Default)]
pub struct ProxyConfigData {
@@ -19,9 +21,29 @@ pub struct ProxyConfigData {
}
pub async fn fetch_proxy_config(url: &str) -> Result<ProxyConfigData> {
let text = reqwest::get(url)
let resp = reqwest::get(url)
.await
.map_err(|e| crate::error::ProxyError::Proxy(format!("fetch_proxy_config GET failed: {e}")))?
;
if let Some(date) = resp.headers().get(reqwest::header::DATE) {
if let Ok(date_str) = date.to_str() {
if let Ok(server_time) = httpdate::parse_http_date(date_str) {
if let Ok(skew) = SystemTime::now().duration_since(server_time).or_else(|e| {
server_time.duration_since(SystemTime::now()).map_err(|_| e)
}) {
let skew_secs = skew.as_secs();
if skew_secs > 60 {
warn!(skew_secs, "Time skew >60s detected from fetch_proxy_config Date header");
} else if skew_secs > 30 {
warn!(skew_secs, "Time skew >30s detected from fetch_proxy_config Date header");
}
}
}
}
}
let text = resp
.text()
.await
.map_err(|e| crate::error::ProxyError::Proxy(format!("fetch_proxy_config read failed: {e}")))?;

View File

@@ -1,6 +1,7 @@
use std::collections::HashMap;
use std::net::SocketAddr;
use std::sync::Arc;
use std::time::Duration;
use std::time::{Duration, Instant};
use tracing::{debug, info, warn};
use rand::seq::SliceRandom;
@@ -10,6 +11,8 @@ use crate::crypto::SecureRandom;
use super::MePool;
pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_connections: usize) {
let mut backoff: HashMap<i32, u64> = HashMap::new();
let mut last_attempt: HashMap<i32, Instant> = HashMap::new();
loop {
tokio::time::sleep(Duration::from_secs(30)).await;
// Per-DC coverage check
@@ -29,18 +32,81 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
.collect();
let has_coverage = dc_addrs.iter().any(|a| writer_addrs.contains(a));
if !has_coverage {
warn!(dc = %dc, "DC has no ME coverage, reconnecting...");
let delay = *backoff.get(dc).unwrap_or(&30);
let now = Instant::now();
if let Some(last) = last_attempt.get(dc) {
if now.duration_since(*last).as_secs() < delay {
continue;
}
}
warn!(dc = %dc, delay, "DC has no ME coverage, reconnecting...");
let mut shuffled = dc_addrs.clone();
shuffled.shuffle(&mut rand::rng());
let mut reconnected = false;
for addr in shuffled {
match pool.connect_one(addr, &rng).await {
Ok(()) => {
info!(%addr, dc = %dc, "ME reconnected for DC coverage");
backoff.insert(*dc, 30);
last_attempt.insert(*dc, now);
reconnected = true;
break;
}
Err(e) => debug!(%addr, dc = %dc, error = %e, "ME reconnect failed"),
}
}
if !reconnected {
let next = (*backoff.get(dc).unwrap_or(&30)).saturating_mul(2).min(300);
backoff.insert(*dc, next);
last_attempt.insert(*dc, now);
}
}
}
// IPv6 coverage check (if available)
let map_v6 = pool.proxy_map_v6.read().await.clone();
let writer_addrs_v6: std::collections::HashSet<SocketAddr> = pool
.writers
.read()
.await
.iter()
.map(|w| w.addr)
.collect();
for (dc, addrs) in map_v6.iter() {
let dc_addrs: Vec<SocketAddr> = addrs
.iter()
.map(|(ip, port)| SocketAddr::new(*ip, *port))
.collect();
let has_coverage = dc_addrs.iter().any(|a| writer_addrs_v6.contains(a));
if !has_coverage {
let delay = *backoff.get(dc).unwrap_or(&30);
let now = Instant::now();
if let Some(last) = last_attempt.get(dc) {
if now.duration_since(*last).as_secs() < delay {
continue;
}
}
warn!(dc = %dc, delay, "IPv6 DC has no ME coverage, reconnecting...");
let mut shuffled = dc_addrs.clone();
shuffled.shuffle(&mut rand::rng());
let mut reconnected = false;
for addr in shuffled {
match pool.connect_one(addr, &rng).await {
Ok(()) => {
info!(%addr, dc = %dc, "ME reconnected for IPv6 DC coverage");
backoff.insert(*dc, 30);
last_attempt.insert(*dc, now);
reconnected = true;
break;
}
Err(e) => debug!(%addr, dc = %dc, error = %e, "ME reconnect failed (IPv6)"),
}
}
if !reconnected {
let next = (*backoff.get(dc).unwrap_or(&30)).saturating_mul(2).min(300);
backoff.insert(*dc, next);
last_attempt.insert(*dc, now);
}
}
}
}

View File

@@ -10,6 +10,7 @@ mod reader;
mod registry;
mod send;
mod secret;
mod rotation;
mod config_updater;
mod wire;
@@ -18,10 +19,11 @@ use bytes::Bytes;
pub use health::me_health_monitor;
pub use ping::{run_me_ping, format_sample_line, MePingReport, MePingSample, MePingFamily};
pub use pool::MePool;
pub use pool_nat::{stun_probe, StunProbeResult};
pub use pool_nat::{stun_probe, detect_public_ip, StunProbeResult};
pub use registry::ConnRegistry;
pub use secret::fetch_proxy_secret;
pub use config_updater::{fetch_proxy_config, me_config_updater};
pub use rotation::me_rotation_task;
pub use wire::proto_flags_for_tag;
#[derive(Debug)]

View File

@@ -48,6 +48,7 @@ pub struct MePool {
pub(super) next_writer_id: AtomicU64,
pub(super) ping_tracker: Arc<Mutex<HashMap<i64, (std::time::Instant, u64)>>>,
pub(super) rtt_stats: Arc<Mutex<HashMap<u64, (f64, f64)>>>,
pub(super) nat_reflection_cache: Arc<Mutex<Option<(std::time::Instant, std::net::SocketAddr)>>>,
pool_size: usize,
}
@@ -79,6 +80,7 @@ impl MePool {
next_writer_id: AtomicU64::new(1),
ping_tracker: Arc::new(Mutex::new(HashMap::new())),
rtt_stats: Arc::new(Mutex::new(HashMap::new())),
nat_reflection_cache: Arc::new(Mutex::new(None)),
})
}

View File

@@ -1,10 +1,12 @@
use std::net::{IpAddr, Ipv4Addr};
use std::time::Duration;
use tracing::{info, warn};
use crate::error::{ProxyError, Result};
use super::MePool;
use std::time::Instant;
#[derive(Debug, Clone, Copy)]
pub struct StunProbeResult {
@@ -17,6 +19,10 @@ pub async fn stun_probe(stun_addr: Option<String>) -> Result<Option<StunProbeRes
fetch_stun_binding(&stun_addr).await
}
pub async fn detect_public_ip() -> Option<IpAddr> {
fetch_public_ipv4_with_retry().await.ok().flatten().map(IpAddr::V4)
}
impl MePool {
pub(super) fn translate_ip_for_nat(&self, ip: IpAddr) -> IpAddr {
let nat_ip = self
@@ -93,6 +99,15 @@ impl MePool {
}
pub(super) async fn maybe_reflect_public_addr(&self) -> Option<std::net::SocketAddr> {
const STUN_CACHE_TTL: Duration = Duration::from_secs(600);
if let Ok(mut cache) = self.nat_reflection_cache.try_lock() {
if let Some((ts, addr)) = *cache {
if ts.elapsed() < STUN_CACHE_TTL {
return Some(addr);
}
}
}
let stun_addr = self
.nat_stun
.clone()
@@ -101,6 +116,9 @@ impl MePool {
Ok(sa) => {
if let Some(result) = sa {
info!(local = %result.local_addr, reflected = %result.reflected_addr, "NAT probe: reflected address");
if let Ok(mut cache) = self.nat_reflection_cache.try_lock() {
*cache = Some((Instant::now(), result.reflected_addr));
}
Some(result.reflected_addr)
} else {
None

View File

@@ -32,6 +32,7 @@ pub(crate) async fn reader_loop(
cancel: CancellationToken,
) -> Result<()> {
let mut raw = enc_leftover;
let mut expected_seq: i32 = 0;
loop {
let mut tmp = [0u8; 16_384];
@@ -82,6 +83,14 @@ pub(crate) async fn reader_loop(
continue;
}
let seq_no = i32::from_le_bytes(frame[4..8].try_into().unwrap());
if seq_no != expected_seq {
warn!(seq_no, expected = expected_seq, "ME RPC seq mismatch");
expected_seq = seq_no.wrapping_add(1);
} else {
expected_seq = expected_seq.wrapping_add(1);
}
let payload = &frame[8..pe];
if payload.len() < 4 {
continue;

View File

@@ -29,7 +29,7 @@ pub struct ConnWriter {
}
pub struct ConnRegistry {
map: RwLock<HashMap<u64, mpsc::UnboundedSender<MeResponse>>>,
map: RwLock<HashMap<u64, mpsc::Sender<MeResponse>>>,
writers: RwLock<HashMap<u64, Arc<Mutex<RpcWriter>>>>,
writer_for_conn: RwLock<HashMap<u64, u64>>,
conns_for_writer: RwLock<HashMap<u64, Vec<u64>>>,
@@ -50,9 +50,9 @@ impl ConnRegistry {
}
}
pub async fn register(&self) -> (u64, mpsc::UnboundedReceiver<MeResponse>) {
pub async fn register(&self) -> (u64, mpsc::Receiver<MeResponse>) {
let id = self.next_id.fetch_add(1, Ordering::Relaxed);
let (tx, rx) = mpsc::unbounded_channel();
let (tx, rx) = mpsc::channel(1024);
self.map.write().await.insert(id, tx);
(id, rx)
}
@@ -70,7 +70,7 @@ impl ConnRegistry {
pub async fn route(&self, id: u64, resp: MeResponse) -> bool {
let m = self.map.read().await;
if let Some(tx) = m.get(&id) {
tx.send(resp).is_ok()
tx.try_send(resp).is_ok()
} else {
false
}

View File

@@ -0,0 +1,37 @@
use std::sync::Arc;
use std::time::Duration;
use tracing::{info, warn};
use crate::crypto::SecureRandom;
use super::MePool;
/// Periodically refresh ME connections to avoid long-lived degradation.
pub async fn me_rotation_task(pool: Arc<MePool>, rng: Arc<SecureRandom>, interval: Duration) {
let interval = interval.max(Duration::from_secs(600));
loop {
tokio::time::sleep(interval).await;
let candidate = {
let ws = pool.writers.read().await;
ws.get(0).cloned()
};
let Some(w) = candidate else {
continue;
};
info!(addr = %w.addr, writer_id = w.id, "Rotating ME connection");
match pool.connect_one(w.addr, rng.as_ref()).await {
Ok(()) => {
// Remove old writer after new one is up.
pool.remove_writer_and_reroute(w.id).await;
}
Err(e) => {
warn!(addr = %w.addr, writer_id = w.id, error = %e, "ME rotation connect failed");
}
}
}
}

View File

@@ -1,6 +1,8 @@
use std::time::Duration;
use tracing::{debug, info, warn};
use std::time::SystemTime;
use httpdate;
use crate::error::{ProxyError, Result};
@@ -63,6 +65,23 @@ pub async fn download_proxy_secret() -> Result<Vec<u8>> {
)));
}
if let Some(date) = resp.headers().get(reqwest::header::DATE) {
if let Ok(date_str) = date.to_str() {
if let Ok(server_time) = httpdate::parse_http_date(date_str) {
if let Ok(skew) = SystemTime::now().duration_since(server_time).or_else(|e| {
server_time.duration_since(SystemTime::now()).map_err(|_| e)
}) {
let skew_secs = skew.as_secs();
if skew_secs > 60 {
warn!(skew_secs, "Time skew >60s detected from proxy-secret Date header");
} else if skew_secs > 30 {
warn!(skew_secs, "Time skew >30s detected from proxy-secret Date header");
}
}
}
}
}
let data = resp
.bytes()
.await

View File

@@ -1,6 +1,7 @@
use std::net::SocketAddr;
use std::sync::Arc;
use std::sync::atomic::Ordering;
use std::time::Duration;
use tokio::sync::Mutex;
use tracing::{debug, warn};
@@ -38,6 +39,7 @@ impl MePool {
our_addr,
proto_flags,
};
let mut emergency_attempts = 0;
loop {
if let Some(current) = self.registry.get_writer(conn_id).await {
@@ -71,6 +73,10 @@ impl MePool {
let mut candidate_indices = self.candidate_indices_for_dc(&writers_snapshot, target_dc).await;
if candidate_indices.is_empty() {
// Emergency connect-on-demand
if emergency_attempts >= 3 {
return Err(ProxyError::Proxy("No ME writers available for target DC".into()));
}
emergency_attempts += 1;
let map = self.proxy_map_v4.read().await;
if let Some(addrs) = map.get(&(target_dc as i32)) {
let mut shuffled = addrs.clone();
@@ -82,6 +88,7 @@ impl MePool {
break;
}
}
tokio::time::sleep(Duration::from_millis(100 * emergency_attempts)).await;
let ws2 = self.writers.read().await;
writers_snapshot = ws2.clone();
drop(ws2);