Skip to content

Commit

Permalink
2. Skysafari crash (brickbots#242)
Browse files Browse the repository at this point in the history
* Fix possible deadlock, reduce logging

* Rewrite pos_server loop to be more robust for disconnects.

Also added test script
  • Loading branch information
mrosseel authored Oct 12, 2024
1 parent 0271eac commit 3210133
Show file tree
Hide file tree
Showing 2 changed files with 161 additions and 40 deletions.
100 changes: 60 additions & 40 deletions python/PiFinder/pos_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from PiFinder.multiproclogging import MultiprocLogging
from skyfield.positionlib import position_of_radec
import sys
import time

logger = logging.getLogger("PosServer")

Expand Down Expand Up @@ -179,46 +180,6 @@ def extract_command(s):
return match.group(1) if match else None


def run_server(shared_state, p_ui_queue, log_queue):
MultiprocLogging.configurer(log_queue)
global ui_queue
try:
ui_queue = p_ui_queue
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as server_socket:
logger.info("Starting SkySafari server")
server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server_socket.bind(("", 4030))
server_socket.listen(1)
out_data = None
while True:
client_socket, _address = server_socket.accept()
while True:
in_data = client_socket.recv(1024).decode()
if in_data:
logger.debug("Received from skysafari: '%s'", in_data)
command = extract_command(in_data)
if command:
command_handler = lx_command_dict.get(command, None)
if command_handler:
out_data = command_handler(shared_state, in_data)
else:
logger.warn("Unknown Command: %s", in_data)
out_data = not_implemented(shared_state, in_data)
else:
break

if out_data:
if out_data in ("0", "1"):
client_socket.send(bytes(out_data, "utf-8"))
else:
client_socket.send(bytes(out_data + "#", "utf-8"))
out_data = None
client_socket.close()
except Exception as e:
logger.exception(e)
logger.error("An error occurred in the skysafari server, exiting!")


lx_command_dict = {
"GD": get_telescope_dec,
"GR": get_telescope_ra,
Expand All @@ -228,3 +189,62 @@ def run_server(shared_state, p_ui_queue, log_queue):
"Sr": parse_sr_command,
"Q": respond_none,
}


def setup_server_socket():
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server_socket.bind(("", 4030))
server_socket.listen(1)
return server_socket


def handle_client(client_socket, shared_state):
client_socket.settimeout(60)
client_socket.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)

while True:
try:
in_data = client_socket.recv(1024).decode()
if not in_data:
break

logging.debug("Received from skysafari: %s", in_data)
command = extract_command(in_data)
if command:
command_handler = lx_command_dict.get(command, not_implemented)
out_data = command_handler(shared_state, in_data)
if out_data:
response = out_data if out_data in ("0", "1") else out_data + "#"
client_socket.send(response.encode())
except socket.timeout:
logging.warning("Connection timed out.")
break
except ConnectionResetError:
logging.warning("Client disconnected unexpectedly.")
break

client_socket.close()


def run_server(shared_state, p_ui_queue, log_queue):
MultiprocLogging.configurer(log_queue)
global ui_queue
ui_queue = p_ui_queue
logger = logging.getLogger(__name__)

while True:
try:
with setup_server_socket() as server_socket:
logger.info("SkySafari server started and listening")
while True:
client_socket, address = server_socket.accept()
logger.info("New connection from %s", address)
handle_client(client_socket, shared_state)
except Exception:
logger.exception("Unexpected server error")
logger.info("Attempting to restart server in 5 seconds...")
time.sleep(5)
except KeyboardInterrupt:
logger.info("Server shutting down...")
break
101 changes: 101 additions & 0 deletions python/tests/skysafari.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import socket
import time
import random
import logging
import select


# This is a stress test client for the LX200 server.
# It sends random commands to the server and randomly
# disconnects to test the server's robustness.

# Set up logging
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Server details
SERVER_HOST = 'localhost' # Change this if the server is on a different machine
SERVER_PORT = 4030

# List of valid LX200 commands (add more as needed)
VALID_COMMANDS = [
':GR#', # Get RA
':GD#', # Get DEC
':Q#', # Stop all motion
':MS#', # Slew to target
':CM#', # Sync to target
]

# List of invalid commands
INVALID_COMMANDS = [
':XX#',
':YY#',
':ZZ#',
'INVALID',
'RANDOM',
]

# Command response timeout (in seconds)
RESPONSE_TIMEOUT = 0.5


def send_command(sock, command):
try:
logger.info(f"Sending command: {command}")
sock.sendall(command.encode())

# Wait for the response with a timeout
ready = select.select([sock], [], [], RESPONSE_TIMEOUT)
if ready[0]:
response = sock.recv(1024).decode().strip()
logger.info(f"Received response: {response}")
else:
logger.warning(f"No response received within {RESPONSE_TIMEOUT} seconds")
except Exception as e:
logger.error(f"Error sending command: {e}")


def random_disconnect(sock):
if random.random() < 0.2: # 20% chance to disconnect
logger.info("Randomly disconnecting...")
sock.close()
return True
return False


def test_server():
while True:
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.connect((SERVER_HOST, SERVER_PORT))
logger.info("Connected to server")

# Send 5 to 15 commands before potential disconnect
for _ in range(random.randint(5, 15)):
if random.random() < 0.8: # 80% chance of valid command
command = random.choice(VALID_COMMANDS)
else:
command = random.choice(INVALID_COMMANDS)

send_command(sock, command)

if random_disconnect(sock):
break

time.sleep(random.uniform(0.1, 1)) # Wait between commands

if not sock._closed:
logger.info("Closing connection normally")
sock.close()

except ConnectionRefusedError:
logger.error("Connection refused. Is the server running?")
except Exception as e:
logger.error(f"Unexpected error: {e}")

time.sleep(random.uniform(1, 5)) # Wait before attempting to reconnect


if __name__ == "__main__":
test_server()

0 comments on commit 3210133

Please sign in to comment.