Replies: 2 comments
-
Alright, I did some experimenting and after a lot of experimentation, I discovered that this will work correctly if and only if I set the allocable range to Here's the updated example where I'm now able to allocate two direct sockets without messing up the permitted range for the multishot accept operation: #include <catch2/catch_test_macros.hpp>
#include <catch2/catch_translate_exception.hpp>
#include <arpa/inet.h>
#include <assert.h>
#include <fcntl.h>
#include <liburing.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <unistd.h>
enum {
T_EXIT_PASS = 0,
T_EXIT_SKIP = 77,
T_EXIT_FAIL = 1,
};
static int use_port;
#define NFILES 20
int
t_bind_ephemeral_port( int fd, struct sockaddr_in* addr ) {
socklen_t addrlen;
int ret;
addr->sin_port = 0;
if ( bind( fd, (struct sockaddr*)addr, sizeof( *addr ) ) )
return -errno;
addrlen = sizeof( *addr );
ret = getsockname( fd, (struct sockaddr*)addr, &addrlen );
assert( !ret );
assert( addr->sin_port != 0 );
return 0;
}
static int
start_accept_listen( void ) {
int fd, ret;
fd = socket( AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP );
int32_t val = 1;
ret = setsockopt( fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof( val ) );
assert( ret != -1 );
ret = setsockopt( fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof( val ) );
assert( ret != -1 );
struct sockaddr_in laddr;
laddr.sin_family = AF_INET;
laddr.sin_addr.s_addr = inet_addr( "127.0.0.1" );
ret = t_bind_ephemeral_port( fd, &laddr );
use_port = laddr.sin_port;
assert( !ret );
ret = listen( fd, 128 );
assert( ret != -1 );
return fd;
}
static int
do_connect( void ) {
struct sockaddr_in addr;
int fd, ret;
int32_t val;
fd = socket( AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP );
val = 1;
ret = setsockopt( fd, IPPROTO_TCP, TCP_NODELAY, &val, sizeof( val ) );
assert( ret != -1 );
int32_t flags = fcntl( fd, F_GETFL, 0 );
assert( flags != -1 );
flags |= O_NONBLOCK;
ret = fcntl( fd, F_SETFL, flags );
assert( ret != -1 );
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = inet_addr( "127.0.0.1" );
addr.sin_port = use_port;
ret = connect( fd, (struct sockaddr*)&addr, sizeof( addr ) );
assert( ret == -1 );
flags = fcntl( fd, F_GETFL, 0 );
assert( flags != -1 );
flags &= ~O_NONBLOCK;
ret = fcntl( fd, F_SETFL, flags );
assert( ret != -1 );
return fd;
}
static int
close_rand_direct_file( struct io_uring* ring, int* expect_fd ) {
struct io_uring_sqe* sqe;
struct io_uring_cqe* cqe;
int to_close, ret;
to_close = rand() % 7;
sqe = io_uring_get_sqe( ring );
io_uring_prep_close_direct( sqe, to_close );
sqe->user_data = 2;
ret = io_uring_submit( ring );
if ( ret != 1 ) {
printf( "submit %d\n", ret );
return 1;
}
ret = io_uring_wait_cqe( ring, &cqe );
if ( ret ) {
printf( "wait res close %d\n", ret );
return 1;
}
printf( "closed direct %d, res %d\n", to_close, cqe->res );
io_uring_cqe_seen( ring, cqe );
*expect_fd = to_close;
return 0;
}
TEST_CASE( "axboe" ) {
srand( getpid() );
struct io_uring_params p = {};
struct io_uring ring;
struct io_uring_sqe* sqe;
struct io_uring_cqe* cqe;
int ret, fd, i, nopen, expect_fd;
io_uring_queue_init_params( 8, &ring, &p );
ret = io_uring_register_files_sparse( &ring, NFILES );
if ( ret ) {
fprintf( stderr, "register files: %d\n", ret );
CHECK( false );
return;
}
fd = start_accept_listen();
ret = io_uring_register_file_alloc_range( &ring, 0, 7 );
if ( ret ) {
fprintf( stderr, "register files alloc range: %d\n", ret );
CHECK( false );
return;
}
sqe = io_uring_get_sqe( &ring );
io_uring_prep_socket_direct( sqe, AF_INET, SOCK_STREAM, 0, 7, 0 );
io_uring_submit( &ring );
io_uring_wait_cqe( &ring, &cqe );
CHECK( cqe->res >= 0 );
io_uring_cqe_seen( &ring, cqe );
sqe = io_uring_get_sqe( &ring );
io_uring_prep_socket_direct( sqe, AF_INET, SOCK_STREAM, 0, 8, 0 );
io_uring_submit( &ring );
io_uring_wait_cqe( &ring, &cqe );
CHECK( cqe->res >= 0 );
io_uring_cqe_seen( &ring, cqe );
sqe = io_uring_get_sqe( &ring );
io_uring_prep_multishot_accept_direct( sqe, fd, NULL, NULL, 0 );
sqe->user_data = 1;
io_uring_submit( &ring );
usleep( 1000 );
expect_fd = 0;
nopen = 0;
for ( i = 0; i < 4 * NFILES; i++ ) {
int sockfd = do_connect();
if ( sockfd < 0 )
break;
ret = io_uring_wait_cqe( &ring, &cqe );
if ( ret ) {
printf( "wait cqe %d\n", ret );
break;
}
printf( "accept res %d, flags %x\n", cqe->res, cqe->flags );
if ( !( cqe->flags & IORING_CQE_F_MORE ) ) {
printf( "MORE not set, done\n" );
break;
}
if ( expect_fd != cqe->res ) {
printf( "expected fd %d, got %d\n", expect_fd, cqe->res );
break;
}
io_uring_cqe_seen( &ring, cqe );
close( sockfd );
++nopen;
if ( nopen == 7 ) {
if ( close_rand_direct_file( &ring, &expect_fd ) )
break;
nopen--;
} else {
expect_fd++;
}
}
io_uring_queue_exit( &ring );
} |
Beta Was this translation helpful? Give feedback.
-
Ha, didn't even realize I left the code in its Catch2 version. My bad. Basically, I've decided to chalk this up to a kernel bug. How do I know it's a bug in the kernel? Simple! I rewrote my code to instead set the alloc range from So basically, I noticed that my test would hit a runtime assertion, noting that multishot accept() had given me fd 10 which is a forbidden fd. Assuming I had an off-by-one error, I simply invoked I thought I was done. Then I re-ran the test and hit the runtime assertion again. Turns out, if I just sat there in my terminal running the test over and over and over again, I'd alternate between getting the expected exception or the runtime assertion. In essence, io_uring didn't respect the alloc range call and it was race-y. This is probably fixed in 6.5 but that's too new for my library to force it on users. Plus, I didn't like how allocating a sub-range of the registered file table bifurcates its availability. I'm closing this discussion because there's no activity and I'm 98% confident it's not my fault, it's the kernel's. Plus, if I use the direct version of accept, I can chain it into a multishot recv which is, like, hella better. |
Beta Was this translation helpful? Give feedback.
-
If I comment back in the code that invokes
io_uring_prep_socket_direct()
, then the multishot accept direct variant no longer correct allocates starting from the offset 7.Am I invoking this incorrectly? I'm on kernel version 6.1. Is this just a bug?
Reproducer:
Beta Was this translation helpful? Give feedback.
All reactions