Browse Source

ready for merge in master (#964)

uevent should be format

Enum of smoltcp socket should be optimized.

need to add interface for routing subsys

actix is still not abled to run.

clean some casual added code to other places
Samuel Dai 5 months ago
parent
commit
40d9375b6b
100 changed files with 10451 additions and 3317 deletions
  1. 54 23
      kernel/src/arch/x86_64/syscall/mod.rs
  2. 378 0
      kernel/src/arch/x86_64/syscall/nr.rs
  3. 23 2
      kernel/src/driver/acpi/sysfs.rs
  4. 1 0
      kernel/src/driver/base/device/dd.rs
  5. 5 2
      kernel/src/driver/base/device/driver.rs
  6. 2 2
      kernel/src/driver/base/device/mod.rs
  7. 103 4
      kernel/src/driver/base/kobject.rs
  8. 32 2
      kernel/src/driver/base/kset.rs
  9. 1 0
      kernel/src/driver/base/mod.rs
  10. 504 0
      kernel/src/driver/base/uevent/kobject_uevent.rs
  11. 102 0
      kernel/src/driver/base/uevent/mod.rs
  12. 17 59
      kernel/src/driver/net/e1000e/e1000e_driver.rs
  13. 5 2
      kernel/src/driver/net/irq_handle.rs
  14. 42 88
      kernel/src/driver/net/loopback.rs
  15. 177 11
      kernel/src/driver/net/mod.rs
  16. 12 12
      kernel/src/driver/net/sysfs.rs
  17. 17 60
      kernel/src/driver/net/virtio_net.rs
  18. 10 9
      kernel/src/filesystem/vfs/file.rs
  19. 2 2
      kernel/src/filesystem/vfs/open.rs
  20. 1 1
      kernel/src/filesystem/vfs/syscall.rs
  21. 5 1
      kernel/src/init/initial_kthread.rs
  22. 7 122
      kernel/src/net/mod.rs
  23. 131 134
      kernel/src/net/net_core.rs
  24. 143 0
      kernel/src/net/socket/base.rs
  25. 91 0
      kernel/src/net/socket/buffer.rs
  26. 64 0
      kernel/src/net/socket/common/epoll_items.rs
  27. 20 0
      kernel/src/net/socket/common/mod.rs
  28. 72 0
      kernel/src/net/socket/common/poll_unit.rs
  29. 118 0
      kernel/src/net/socket/common/shutdown.rs
  30. 76 0
      kernel/src/net/socket/define/ipproto.rs
  31. 32 0
      kernel/src/net/socket/define/mod.rs
  32. 110 0
      kernel/src/net/socket/define/msg_flag.rs
  33. 92 0
      kernel/src/net/socket/define/option.rs
  34. 115 0
      kernel/src/net/socket/define/option_level.rs
  35. 133 0
      kernel/src/net/socket/define/tmp.rs
  36. 43 0
      kernel/src/net/socket/endpoint.rs
  37. 121 0
      kernel/src/net/socket/family.rs
  38. 0 42
      kernel/src/net/socket/handle.rs
  39. 0 1012
      kernel/src/net/socket/inet.rs
  40. 148 0
      kernel/src/net/socket/inet/common/mod.rs
  41. 114 0
      kernel/src/net/socket/inet/common/port.rs
  42. 156 0
      kernel/src/net/socket/inet/datagram/inner.rs
  43. 453 0
      kernel/src/net/socket/inet/datagram/mod.rs
  44. 68 0
      kernel/src/net/socket/inet/ip_def.rs
  45. 150 0
      kernel/src/net/socket/inet/mod.rs
  46. 443 0
      kernel/src/net/socket/inet/stream/inner.rs
  47. 485 0
      kernel/src/net/socket/inet/stream/mod.rs
  48. 55 0
      kernel/src/net/socket/inet/syscall.rs
  49. 195 0
      kernel/src/net/socket/inode.rs
  50. 26 917
      kernel/src/net/socket/mod.rs
  51. 1268 0
      kernel/src/net/socket/netlink/af_netlink.rs
  52. 9 0
      kernel/src/net/socket/netlink/callback.rs
  53. 10 0
      kernel/src/net/socket/netlink/endpoint.rs
  54. 44 0
      kernel/src/net/socket/netlink/mod.rs
  55. 319 0
      kernel/src/net/socket/netlink/netlink.rs
  56. 56 0
      kernel/src/net/socket/netlink/netlink_proto.rs
  57. 109 0
      kernel/src/net/socket/netlink/skbuff.rs
  58. 34 0
      kernel/src/net/socket/netlink/sock.rs
  59. 0 239
      kernel/src/net/socket/unix.rs
  60. 37 0
      kernel/src/net/socket/unix/mod.rs
  61. 260 0
      kernel/src/net/socket/unix/seqpacket/inner.rs
  62. 483 0
      kernel/src/net/socket/unix/seqpacket/mod.rs
  63. 243 0
      kernel/src/net/socket/unix/stream/inner.rs
  64. 478 0
      kernel/src/net/socket/unix/stream/mod.rs
  65. 28 0
      kernel/src/net/socket/utils.rs
  66. 166 561
      kernel/src/net/syscall.rs
  67. 347 0
      kernel/src/net/syscall_util.rs
  68. 2 1
      kernel/src/process/mod.rs
  69. 8 8
      kernel/src/syscall/mod.rs
  70. 1 1
      tools/run-qemu.sh
  71. 3 0
      user/apps/ping/.gitignore
  72. 18 0
      user/apps/ping/Cargo.toml
  73. 56 0
      user/apps/ping/Makefile
  74. 23 0
      user/apps/ping/README.md
  75. 50 0
      user/apps/ping/src/args.rs
  76. 45 0
      user/apps/ping/src/config.rs
  77. 10 0
      user/apps/ping/src/error.rs
  78. 23 0
      user/apps/ping/src/main.rs
  79. 151 0
      user/apps/ping/src/ping.rs
  80. 2 0
      user/apps/test-uevent/.cargo/config.toml
  81. 3 0
      user/apps/test-uevent/.gitignore
  82. 12 0
      user/apps/test-uevent/Cargo.toml
  83. 56 0
      user/apps/test-uevent/Makefile
  84. 14 0
      user/apps/test-uevent/README.md
  85. 150 0
      user/apps/test-uevent/src/main.rs
  86. 3 0
      user/apps/test_seqpacket/.gitignore
  87. 12 0
      user/apps/test_seqpacket/Cargo.toml
  88. 56 0
      user/apps/test_seqpacket/Makefile
  89. 14 0
      user/apps/test_seqpacket/README.md
  90. 190 0
      user/apps/test_seqpacket/src/main.rs
  91. 39 0
      user/apps/test_seqpacket/src/seq_pair.rs
  92. 155 0
      user/apps/test_seqpacket/src/seq_socket.rs
  93. 3 0
      user/apps/test_unix_stream_socket/.gitignore
  94. 11 0
      user/apps/test_unix_stream_socket/Cargo.toml
  95. 56 0
      user/apps/test_unix_stream_socket/Makefile
  96. 5 0
      user/apps/test_unix_stream_socket/README.md
  97. 153 0
      user/apps/test_unix_stream_socket/src/main.rs
  98. 24 0
      user/dadk/config/ping_0_1_0.dadk
  99. 29 0
      user/dadk/config/test_seqpacket_0_1_0.dadk
  100. 29 0
      user/dadk/config/test_stream_socket_0_1_0.dadk

+ 54 - 23
kernel/src/arch/x86_64/syscall/mod.rs

@@ -1,7 +1,7 @@
 use crate::{
     arch::{
         ipc::signal::X86_64SignalArch,
-        syscall::nr::{SYS_ARCH_PRCTL, SYS_RT_SIGRETURN},
+        syscall::nr::{SysCall, SYS_ARCH_PRCTL, SYS_RT_SIGRETURN},
         CurrentIrqArch,
     },
     exception::InterruptArch,
@@ -53,7 +53,7 @@ macro_rules! syscall_return {
 
         if $show {
             let pid = ProcessManager::current_pcb().pid();
-            debug!("syscall return:pid={:?},ret= {:?}\n", pid, ret as isize);
+            debug!("[SYS] [Pid: {:?}] [Retn: {:?}]", pid, ret as i64);
         }
 
         unsafe {
@@ -63,6 +63,24 @@ macro_rules! syscall_return {
     }};
 }
 
+macro_rules! normal_syscall_return {
+    ($val:expr, $regs:expr, $show:expr) => {{
+        let ret = $val;
+
+        if $show {
+            let pid = ProcessManager::current_pcb().pid();
+            debug!("[SYS] [Pid: {:?}] [Retn: {:?}]", pid, ret);
+        }
+
+        $regs.rax = ret.unwrap_or_else(|e| e.to_posix_errno() as usize) as u64;
+
+        unsafe {
+            CurrentIrqArch::interrupt_disable();
+        }
+        return;
+    }};
+}
+
 #[no_mangle]
 pub extern "sysv64" fn syscall_handler(frame: &mut TrapFrame) {
     let syscall_num = frame.rax as usize;
@@ -87,15 +105,38 @@ pub extern "sysv64" fn syscall_handler(frame: &mut TrapFrame) {
     ];
     mfence();
     let pid = ProcessManager::current_pcb().pid();
-    let show = false;
-    // let show = if syscall_num != SYS_SCHED && pid.data() >= 7 {
-    //     true
-    // } else {
-    //     false
-    // };
-
-    if show {
-        debug!("syscall: pid: {:?}, num={:?}\n", pid, syscall_num);
+    let mut show = (syscall_num != SYS_SCHED) && (pid.data() >= 7);
+    // let mut show = true;
+
+    let to_print = SysCall::try_from(syscall_num);
+    if let Ok(to_print) = to_print {
+        use SysCall::*;
+        match to_print {
+            SYS_ACCEPT | SYS_ACCEPT4 | SYS_BIND | SYS_CONNECT | SYS_SHUTDOWN | SYS_LISTEN => {
+                show &= true;
+            }
+            SYS_RECVFROM | SYS_SENDTO | SYS_SENDMSG | SYS_RECVMSG => {
+                show &= true;
+            }
+            SYS_SOCKET | SYS_GETSOCKNAME | SYS_GETPEERNAME | SYS_SOCKETPAIR | SYS_SETSOCKOPT
+            | SYS_GETSOCKOPT => {
+                show &= true;
+            }
+            SYS_OPEN | SYS_OPENAT | SYS_CREAT | SYS_CLOSE => {
+                show &= true;
+            }
+            SYS_READ | SYS_WRITE | SYS_READV | SYS_WRITEV | SYS_PREAD64 | SYS_PWRITE64
+            | SYS_PREADV | SYS_PWRITEV | SYS_PREADV2 => {
+                show &= true;
+            }
+            _ => {
+                show &= false;
+            }
+        }
+
+        if show {
+            debug!("[SYS] [Pid: {:?}] [Call: {:?}]", pid, to_print);
+        }
     }
 
     // Arch specific syscall
@@ -108,21 +149,11 @@ pub extern "sysv64" fn syscall_handler(frame: &mut TrapFrame) {
             );
         }
         SYS_ARCH_PRCTL => {
-            syscall_return!(
-                Syscall::arch_prctl(args[0], args[1])
-                    .unwrap_or_else(|e| e.to_posix_errno() as usize),
-                frame,
-                show
-            );
+            normal_syscall_return!(Syscall::arch_prctl(args[0], args[1]), frame, show);
         }
         _ => {}
     }
-    syscall_return!(
-        Syscall::handle(syscall_num, &args, frame).unwrap_or_else(|e| e.to_posix_errno() as usize)
-            as u64,
-        frame,
-        show
-    );
+    normal_syscall_return!(Syscall::handle(syscall_num, &args, frame), frame, show);
 }
 
 /// 系统调用初始化

+ 378 - 0
kernel/src/arch/x86_64/syscall/nr.rs

@@ -355,3 +355,381 @@ pub const SYS_WAIT4: usize = 61;
 pub const SYS_WAITID: usize = 247;
 pub const SYS_WRITE: usize = 1;
 pub const SYS_WRITEV: usize = 20;
+
+use num_traits::{FromPrimitive, ToPrimitive};
+use system_error::SystemError;
+#[allow(non_camel_case_types)]
+#[derive(Debug, Clone, Copy, FromPrimitive, ToPrimitive, PartialEq, Eq)]
+pub enum SysCall {
+    SYS__SYSCTL = 156,
+    SYS_ACCEPT = 43,
+    SYS_ACCEPT4 = 288,
+    SYS_ACCESS = 21,
+    SYS_ACCT = 163,
+    SYS_ADD_KEY = 248,
+    SYS_ADJTIMEX = 159,
+    SYS_AFS_SYSCALL = 183,
+    SYS_ALARM = 37,
+    SYS_ARCH_PRCTL = 158,
+    SYS_BIND = 49,
+    SYS_BPF = 321,
+    SYS_BRK = 12,
+    SYS_CAPGET = 125,
+    SYS_CAPSET = 126,
+    SYS_CHDIR = 80,
+    SYS_CHMOD = 90,
+    SYS_CHOWN = 92,
+    SYS_CHROOT = 161,
+    SYS_CLOCK_ADJTIME = 305,
+    SYS_CLOCK_GETRES = 229,
+    SYS_CLOCK_GETTIME = 228,
+    SYS_CLOCK_NANOSLEEP = 230,
+    SYS_CLOCK_SETTIME = 227,
+    SYS_CLONE = 56,
+    SYS_CLONE3 = 435,
+    SYS_CLOSE = 3,
+    SYS_CLOSE_RANGE = 436,
+    SYS_CONNECT = 42,
+    SYS_COPY_FILE_RANGE = 326,
+    SYS_CREAT = 85,
+    SYS_CREATE_MODULE = 174,
+    SYS_DELETE_MODULE = 176,
+    SYS_DUP = 32,
+    SYS_DUP2 = 33,
+    SYS_DUP3 = 292,
+    SYS_EPOLL_CREATE = 213,
+    SYS_EPOLL_CREATE1 = 291,
+    SYS_EPOLL_CTL = 233,
+    SYS_EPOLL_CTL_OLD = 214,
+    SYS_EPOLL_PWAIT = 281,
+    SYS_EPOLL_PWAIT2 = 441,
+    SYS_EPOLL_WAIT = 232,
+    SYS_EPOLL_WAIT_OLD = 215,
+    SYS_EVENTFD = 284,
+    SYS_EVENTFD2 = 290,
+    SYS_EXECVE = 59,
+    SYS_EXECVEAT = 322,
+    SYS_EXIT = 60,
+    SYS_EXIT_GROUP = 231,
+    SYS_FACCESSAT = 269,
+    SYS_FACCESSAT2 = 439,
+    SYS_FADVISE64 = 221,
+    SYS_FALLOCATE = 285,
+    SYS_FANOTIFY_INIT = 300,
+    SYS_FANOTIFY_MARK = 301,
+    SYS_FCHDIR = 81,
+    SYS_FCHMOD = 91,
+    SYS_FCHMODAT = 268,
+    SYS_FCHOWN = 93,
+    SYS_FCHOWNAT = 260,
+    SYS_FCNTL = 72,
+    SYS_FDATASYNC = 75,
+    SYS_FGETXATTR = 193,
+    SYS_FINIT_MODULE = 313,
+    SYS_FLISTXATTR = 196,
+    SYS_FLOCK = 73,
+    SYS_FORK = 57,
+    SYS_FREMOVEXATTR = 199,
+    SYS_FSCONFIG = 431,
+    SYS_FSETXATTR = 190,
+    SYS_FSMOUNT = 432,
+    SYS_FSOPEN = 430,
+    SYS_FSPICK = 433,
+    SYS_FSTAT = 5,
+    SYS_FSTATFS = 138,
+    SYS_FSYNC = 74,
+    SYS_FTRUNCATE = 77,
+    SYS_FUTEX = 202,
+    SYS_FUTIMESAT = 261,
+    SYS_GET_KERNEL_SYMS = 177,
+    SYS_GET_MEMPOLICY = 239,
+    SYS_GET_ROBUST_LIST = 274,
+    SYS_GET_THREAD_AREA = 211,
+    SYS_GETCPU = 309,
+    SYS_GETCWD = 79,
+    SYS_GETDENTS = 78,
+    SYS_GETDENTS64 = 217,
+    SYS_GETEGID = 108,
+    SYS_GETEUID = 107,
+    SYS_GETGID = 104,
+    SYS_GETGROUPS = 115,
+    SYS_GETITIMER = 36,
+    SYS_GETPEERNAME = 52,
+    SYS_GETPGID = 121,
+    SYS_GETPGRP = 111,
+    SYS_GETPID = 39,
+    SYS_GETPMSG = 181,
+    SYS_GETPPID = 110,
+    SYS_GETPRIORITY = 140,
+    SYS_GETRANDOM = 318,
+    SYS_GETRESGID = 120,
+    SYS_GETRESUID = 118,
+    SYS_GETRLIMIT = 97,
+    SYS_GETRUSAGE = 98,
+    SYS_GETSID = 124,
+    SYS_GETSOCKNAME = 51,
+    SYS_GETSOCKOPT = 55,
+    SYS_GETTID = 186,
+    SYS_GETTIMEOFDAY = 96,
+    SYS_GETUID = 102,
+    SYS_GETXATTR = 191,
+    SYS_INIT_MODULE = 175,
+    SYS_INOTIFY_ADD_WATCH = 254,
+    SYS_INOTIFY_INIT = 253,
+    SYS_INOTIFY_INIT1 = 294,
+    SYS_INOTIFY_RM_WATCH = 255,
+    SYS_IO_CANCEL = 210,
+    SYS_IO_DESTROY = 207,
+    SYS_IO_GETEVENTS = 208,
+    SYS_IO_PGETEVENTS = 333,
+    SYS_IO_SETUP = 206,
+    SYS_IO_SUBMIT = 209,
+    SYS_IO_URING_ENTER = 426,
+    SYS_IO_URING_REGISTER = 427,
+    SYS_IO_URING_SETUP = 425,
+    SYS_IOCTL = 16,
+    SYS_IOPERM = 173,
+    SYS_IOPL = 172,
+    SYS_IOPRIO_GET = 252,
+    SYS_IOPRIO_SET = 251,
+    SYS_KCMP = 312,
+    SYS_KEXEC_FILE_LOAD = 320,
+    SYS_KEXEC_LOAD = 246,
+    SYS_KEYCTL = 250,
+    SYS_KILL = 62,
+    SYS_LCHOWN = 94,
+    SYS_LGETXATTR = 192,
+    SYS_LINK = 86,
+    SYS_LINKAT = 265,
+    SYS_LISTEN = 50,
+    SYS_LISTXATTR = 194,
+    SYS_LLISTXATTR = 195,
+    SYS_LOOKUP_DCOOKIE = 212,
+    SYS_LREMOVEXATTR = 198,
+    SYS_LSEEK = 8,
+    SYS_LSETXATTR = 189,
+    SYS_LSTAT = 6,
+    SYS_MADVISE = 28,
+    SYS_MBIND = 237,
+    SYS_MEMBARRIER = 324,
+    SYS_MEMFD_CREATE = 319,
+    SYS_MIGRATE_PAGES = 256,
+    SYS_MINCORE = 27,
+    SYS_MKDIR = 83,
+    SYS_MKDIRAT = 258,
+    SYS_MKNOD = 133,
+    SYS_MKNODAT = 259,
+    SYS_MLOCK = 149,
+    SYS_MLOCK2 = 325,
+    SYS_MLOCKALL = 151,
+    SYS_MMAP = 9,
+    SYS_MODIFY_LDT = 154,
+    SYS_MOUNT = 165,
+    SYS_MOUNT_SETATTR = 442,
+    SYS_MOVE_MOUNT = 429,
+    SYS_MOVE_PAGES = 279,
+    SYS_MPROTECT = 10,
+    SYS_MQ_GETSETATTR = 245,
+    SYS_MQ_NOTIFY = 244,
+    SYS_MQ_OPEN = 240,
+    SYS_MQ_TIMEDRECEIVE = 243,
+    SYS_MQ_TIMEDSEND = 242,
+    SYS_MQ_UNLINK = 241,
+    SYS_MREMAP = 25,
+    SYS_MSGCTL = 71,
+    SYS_MSGGET = 68,
+    SYS_MSGRCV = 70,
+    SYS_MSGSND = 69,
+    SYS_MSYNC = 26,
+    SYS_MUNLOCK = 150,
+    SYS_MUNLOCKALL = 152,
+    SYS_MUNMAP = 11,
+    SYS_NAME_TO_HANDLE_AT = 303,
+    SYS_NANOSLEEP = 35,
+    SYS_NEWFSTATAT = 262,
+    SYS_NFSSERVCTL = 180,
+    SYS_OPEN = 2,
+    SYS_OPEN_BY_HANDLE_AT = 304,
+    SYS_OPEN_TREE = 428,
+    SYS_OPENAT = 257,
+    SYS_OPENAT2 = 437,
+    SYS_PAUSE = 34,
+    SYS_PERF_EVENT_OPEN = 298,
+    SYS_PERSONALITY = 135,
+    SYS_PIDFD_GETFD = 438,
+    SYS_PIDFD_OPEN = 434,
+    SYS_PIDFD_SEND_SIGNAL = 424,
+    SYS_PIPE = 22,
+    SYS_PIPE2 = 293,
+    SYS_PIVOT_ROOT = 155,
+    SYS_PKEY_ALLOC = 330,
+    SYS_PKEY_FREE = 331,
+    SYS_PKEY_MPROTECT = 329,
+    SYS_POLL = 7,
+    SYS_PPOLL = 271,
+    SYS_PRCTL = 157,
+    SYS_PREAD64 = 17,
+    SYS_PREADV = 295,
+    SYS_PREADV2 = 327,
+    SYS_PRLIMIT64 = 302,
+    SYS_PROCESS_MADVISE = 440,
+    SYS_PROCESS_VM_READV = 310,
+    SYS_PROCESS_VM_WRITEV = 311,
+    SYS_PSELECT6 = 270,
+    SYS_PTRACE = 101,
+    SYS_PUTPMSG = 182,
+    SYS_PWRITE64 = 18,
+    SYS_PWRITEV = 296,
+    SYS_PWRITEV2 = 328,
+    SYS_QUERY_MODULE = 178,
+    SYS_QUOTACTL = 179,
+    SYS_READ = 0,
+    SYS_READAHEAD = 187,
+    SYS_READLINK = 89,
+    SYS_READLINKAT = 267,
+    SYS_READV = 19,
+    SYS_REBOOT = 169,
+    SYS_RECVFROM = 45,
+    SYS_RECVMMSG = 299,
+    SYS_RECVMSG = 47,
+    SYS_REMAP_FILE_PAGES = 216,
+    SYS_REMOVEXATTR = 197,
+    SYS_RENAME = 82,
+    SYS_RENAMEAT = 264,
+    SYS_RENAMEAT2 = 316,
+    SYS_REQUEST_KEY = 249,
+    SYS_RESTART_SYSCALL = 219,
+    SYS_RMDIR = 84,
+    SYS_RSEQ = 334,
+    SYS_RT_SIGACTION = 13,
+    SYS_RT_SIGPENDING = 127,
+    SYS_RT_SIGPROCMASK = 14,
+    SYS_RT_SIGQUEUEINFO = 129,
+    SYS_RT_SIGRETURN = 15,
+    SYS_RT_SIGSUSPEND = 130,
+    SYS_RT_SIGTIMEDWAIT = 128,
+    SYS_RT_TGSIGQUEUEINFO = 297,
+    SYS_SCHED_GET_PRIORITY_MAX = 146,
+    SYS_SCHED_GET_PRIORITY_MIN = 147,
+    SYS_SCHED_GETAFFINITY = 204,
+    SYS_SCHED_GETATTR = 315,
+    SYS_SCHED_GETPARAM = 143,
+    SYS_SCHED_GETSCHEDULER = 145,
+    SYS_SCHED_RR_GET_INTERVAL = 148,
+    SYS_SCHED_SETAFFINITY = 203,
+    SYS_SCHED_SETATTR = 314,
+    SYS_SCHED_SETPARAM = 142,
+    SYS_SCHED_SETSCHEDULER = 144,
+    SYS_SCHED_YIELD = 24,
+    SYS_SECCOMP = 317,
+    SYS_SECURITY = 185,
+    SYS_SELECT = 23,
+    SYS_SEMCTL = 66,
+    SYS_SEMGET = 64,
+    SYS_SEMOP = 65,
+    SYS_SEMTIMEDOP = 220,
+    SYS_SENDFILE = 40,
+    SYS_SENDMMSG = 307,
+    SYS_SENDMSG = 46,
+    SYS_SENDTO = 44,
+    SYS_SET_MEMPOLICY = 238,
+    SYS_SET_ROBUST_LIST = 273,
+    SYS_SET_THREAD_AREA = 205,
+    SYS_SET_TID_ADDRESS = 218,
+    SYS_SETDOMAINNAME = 171,
+    SYS_SETFSGID = 123,
+    SYS_SETFSUID = 122,
+    SYS_SETGID = 106,
+    SYS_SETGROUPS = 116,
+    SYS_SETHOSTNAME = 170,
+    SYS_SETITIMER = 38,
+    SYS_SETNS = 308,
+    SYS_SETPGID = 109,
+    SYS_SETPRIORITY = 141,
+    SYS_SETREGID = 114,
+    SYS_SETRESGID = 119,
+    SYS_SETRESUID = 117,
+    SYS_SETREUID = 113,
+    SYS_SETRLIMIT = 160,
+    SYS_SETSID = 112,
+    SYS_SETSOCKOPT = 54,
+    SYS_SETTIMEOFDAY = 164,
+    SYS_SETUID = 105,
+    SYS_SETXATTR = 188,
+    SYS_SHMAT = 30,
+    SYS_SHMCTL = 31,
+    SYS_SHMDT = 67,
+    SYS_SHMGET = 29,
+    SYS_SHUTDOWN = 48,
+    SYS_SIGALTSTACK = 131,
+    SYS_SIGNALFD = 282,
+    SYS_SIGNALFD4 = 289,
+    SYS_SOCKET = 41,
+    SYS_SOCKETPAIR = 53,
+    SYS_SPLICE = 275,
+    SYS_STAT = 4,
+    SYS_STATFS = 137,
+    SYS_STATX = 332,
+    SYS_SWAPOFF = 168,
+    SYS_SWAPON = 167,
+    SYS_SYMLINK = 88,
+    SYS_SYMLINKAT = 266,
+    SYS_SYNC = 162,
+    SYS_SYNC_FILE_RANGE = 277,
+    SYS_SYNCFS = 306,
+    SYS_SYSFS = 139,
+    SYS_SYSINFO = 99,
+    SYS_SYSLOG = 103,
+    SYS_TEE = 276,
+    SYS_TGKILL = 234,
+    SYS_TIME = 201,
+    SYS_TIMER_CREATE = 222,
+    SYS_TIMER_DELETE = 226,
+    SYS_TIMER_GETOVERRUN = 225,
+    SYS_TIMER_GETTIME = 224,
+    SYS_TIMER_SETTIME = 223,
+    SYS_TIMERFD_CREATE = 283,
+    SYS_TIMERFD_GETTIME = 287,
+    SYS_TIMERFD_SETTIME = 286,
+    SYS_TIMES = 100,
+    SYS_TKILL = 200,
+    SYS_TRUNCATE = 76,
+    SYS_TUXCALL = 184,
+    SYS_UMASK = 95,
+    SYS_UMOUNT2 = 166,
+    SYS_UNAME = 63,
+    SYS_UNLINK = 87,
+    SYS_UNLINKAT = 263,
+    SYS_UNSHARE = 272,
+    SYS_USELIB = 134,
+    SYS_USERFAULTFD = 323,
+    SYS_USTAT = 136,
+    SYS_UTIME = 132,
+    SYS_UTIMENSAT = 280,
+    SYS_UTIMES = 235,
+    SYS_VFORK = 58,
+    SYS_VHANGUP = 153,
+    SYS_VMSPLICE = 278,
+    SYS_VSERVER = 236,
+    SYS_WAIT4 = 61,
+    SYS_WAITID = 247,
+    SYS_WRITE = 1,
+    SYS_WRITEV = 20,
+}
+
+impl TryFrom<usize> for SysCall {
+    type Error = SystemError;
+
+    fn try_from(value: usize) -> Result<Self, Self::Error> {
+        match <Self as FromPrimitive>::from_usize(value) {
+            Some(p) => Ok(p),
+            None => Err(SystemError::EINVAL),
+        }
+    }
+}
+
+impl From<SysCall> for usize {
+    fn from(value: SysCall) -> Self {
+        <SysCall as ToPrimitive>::to_usize(&value).unwrap()
+    }
+}

+ 23 - 2
kernel/src/driver/acpi/sysfs.rs

@@ -22,7 +22,8 @@ use log::{debug, error, warn};
 use system_error::SystemError;
 
 use super::{acpi_kset, AcpiManager};
-
+use crate::driver::base::uevent::kobject_uevent::kobject_uevent;
+use crate::driver::base::uevent::KobjectAction;
 static mut __HOTPLUG_KSET_INSTANCE: Option<Arc<KSet>> = None;
 static mut __ACPI_TABLES_KSET_INSTANCE: Option<Arc<KSet>> = None;
 static mut __ACPI_TABLES_DATA_KSET_INSTANCE: Option<Arc<KSet>> = None;
@@ -115,7 +116,27 @@ impl AcpiManager {
             acpi_table_attr_list().write().push(attr);
             self.acpi_table_data_init(&header)?;
         }
-
+        // TODO:UEVENT
+        unsafe {
+            let _ = kobject_uevent(
+                acpi_tables_kset.clone() as Arc<dyn KObject>,
+                KobjectAction::KOBJADD,
+            );
+            let _ = kobject_uevent(
+                __ACPI_TABLES_DATA_KSET_INSTANCE
+                    .as_ref()
+                    .map(|kset| kset.clone() as Arc<dyn KObject>)
+                    .unwrap(),
+                KobjectAction::KOBJADD,
+            );
+            let _ = kobject_uevent(
+                __ACPI_TABLES_DYNAMIC_KSET_INSTANCE
+                    .as_ref()
+                    .map(|kset| kset.clone() as Arc<dyn KObject>)
+                    .unwrap(),
+                KobjectAction::KOBJADD,
+            );
+        }
         return Ok(());
     }
 

+ 1 - 0
kernel/src/driver/base/device/dd.rs

@@ -571,6 +571,7 @@ impl DriverManager {
         }
 
         // todo: 发送kobj bind的uevent
+        // kobject_uevent();
     }
 
     fn driver_is_bound(&self, device: &Arc<dyn Device>) -> bool {

+ 5 - 2
kernel/src/driver/base/device/driver.rs

@@ -2,6 +2,8 @@ use super::{
     bus::{bus_manager, Bus},
     Device, DeviceMatchName, DeviceMatcher, IdTable,
 };
+use crate::driver::base::uevent::kobject_uevent::kobject_uevent;
+use crate::driver::base::uevent::KobjectAction;
 use crate::{
     driver::base::{
         device::{bus::BusNotifyEvent, dd::DeviceAttrCoredump, device_manager},
@@ -17,7 +19,6 @@ use alloc::{
 use core::fmt::Debug;
 use log::error;
 use system_error::SystemError;
-
 /// @brief: Driver error
 #[allow(dead_code)]
 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
@@ -218,7 +219,9 @@ impl DriverManager {
                 bus_manager().remove_driver(&driver);
             })?;
 
-        // todo: 发送uevent
+        // todo: 发送uevent,类型问题
+        let _ = kobject_uevent(driver.clone() as Arc<dyn KObject>, KobjectAction::KOBJADD);
+        // deferred_probe_extend_timeout();
 
         return Ok(());
     }

+ 2 - 2
kernel/src/driver/base/device/mod.rs

@@ -506,7 +506,7 @@ impl DeviceManager {
         }
         let kobject_parent = self.get_device_parent(&device, deivce_parent)?;
         if let Some(ref kobj) = kobject_parent {
-            log::debug!("kobject parent: {:?}", kobj.name());
+            log::info!("kobject parent: {:?}", kobj.name());
         }
         if let Some(kobject_parent) = kobject_parent {
             // debug!(
@@ -547,7 +547,7 @@ impl DeviceManager {
         }
 
         // todo: 发送uevent: KOBJ_ADD
-
+        // kobject_uevent();
         // probe drivers for a new device
         bus_probe_device(&device);
 

+ 103 - 4
kernel/src/driver/base/kobject.rs

@@ -1,6 +1,7 @@
 use core::{any::Any, fmt::Debug, hash::Hash, ops::Deref};
 
 use alloc::{
+    boxed::Box,
     string::String,
     sync::{Arc, Weak},
 };
@@ -21,7 +22,7 @@ use crate::{
 
 use system_error::SystemError;
 
-use super::kset::KSet;
+use super::{kset::KSet, uevent::kobject_uevent};
 
 pub trait KObject: Any + Send + Sync + Debug + CastFromSync {
     fn as_any_ref(&self) -> &dyn core::any::Any;
@@ -103,10 +104,9 @@ bitflags! {
         const ADD_UEVENT_SENT = 1 << 1;
         const REMOVE_UEVENT_SENT = 1 << 2;
         const INITIALIZED = 1 << 3;
+        const UEVENT_SUPPRESS = 1 << 4;
     }
-
 }
-
 #[derive(Debug)]
 pub struct LockedKObjectState(RwLock<KObjectState>);
 
@@ -251,7 +251,7 @@ impl KObjectManager {
         }
 
         // todo: 发送uevent: KOBJ_REMOVE
-
+        // kobject_uevent();
         sysfs_instance().remove_dir(&kobj);
         kobj.update_kobj_state(None, Some(KObjectState::IN_SYSFS));
         let kset = kobj.kset();
@@ -260,6 +260,105 @@ impl KObjectManager {
         }
         kobj.set_parent(None);
     }
+
+    fn get_kobj_path_length(kobj: &Arc<dyn KObject>) -> usize {
+        log::info!("get_kobj_path_length() kobj:{:?}", kobj.name());
+        let mut length = 1;
+        let mut parent = kobj.parent().unwrap().upgrade().unwrap();
+        /* walk up the ancestors until we hit the one pointing to the
+         * root.
+         * Add 1 to strlen for leading '/' of each level.
+         */
+        let mut length = 0; // 确保 length 被正确初始化
+        let mut iteration_count = 0; // 用于记录迭代次数
+        const MAX_ITERATIONS: usize = 10; // 最大迭代次数
+
+        loop {
+            log::info!(
+                "Iteration {}: parent.name():{:?}",
+                iteration_count,
+                parent.name()
+            );
+            length += parent.name().len() + 1;
+            if let Some(weak_parent) = parent.parent() {
+                if let Some(upgraded_parent) = weak_parent.upgrade() {
+                    parent = upgraded_parent;
+                } else {
+                    log::error!("Failed to upgrade weak reference to parent");
+                    break;
+                }
+            } else {
+                log::error!("Parent has no parent");
+                break;
+            }
+
+            iteration_count += 1;
+            if iteration_count >= MAX_ITERATIONS {
+                log::error!("Reached maximum iteration count, breaking to avoid infinite loop");
+                break;
+            }
+        }
+        return length;
+    }
+
+    /*
+        static void fill_kobj_path(struct kobject *kobj, char *path, int length)
+    {
+        struct kobject *parent;
+
+        --length;
+        for (parent = kobj; parent; parent = parent->parent) {
+            int cur = strlen(kobject_name(parent));
+            /* back up enough to print this name with '/' */
+            length -= cur;
+            memcpy(path + length, kobject_name(parent), cur);
+            *(path + --length) = '/';
+        }
+
+        pr_debug("kobject: '%s' (%p): %s: path = '%s'\n", kobject_name(kobj),
+             kobj, __func__, path);
+    }
+         */
+    fn fill_kobj_path(kobj: &Arc<dyn KObject>, path: &mut [u8], length: usize) {
+        let mut parent = kobj.parent().unwrap().upgrade().unwrap();
+        let mut length = length;
+        length -= 1;
+        loop {
+            log::info!("fill_kobj_path parent.name():{:?}", parent.name());
+            let cur = parent.name().len();
+            if length < cur + 1 {
+                // 如果剩余长度不足以容纳当前名称和分隔符,则退出
+                break;
+            }
+            length -= cur;
+            let parent_name = parent.name();
+            let name = parent_name.as_bytes();
+            for i in 0..cur {
+                path[length + i] = name[i];
+            }
+            length -= 1;
+            path[length] = '/' as u8;
+            if let Some(weak_parent) = parent.parent() {
+                if let Some(upgraded_parent) = weak_parent.upgrade() {
+                    parent = upgraded_parent;
+                } else {
+                    break;
+                }
+            } else {
+                break;
+            }
+        }
+    }
+    // TODO: 实现kobject_get_path
+    // https://code.dragonos.org.cn/xref/linux-6.1.9/lib/kobject.c#139
+    pub fn kobject_get_path(kobj: &Arc<dyn KObject>) -> String {
+        log::debug!("kobject_get_path() kobj:{:?}", kobj.name());
+        let length = Self::get_kobj_path_length(kobj);
+        let path: &mut [u8] = &mut vec![0; length];
+        Self::fill_kobj_path(kobj, path, length);
+        let path_string = String::from_utf8(path.to_vec()).unwrap();
+        return path_string;
+    }
 }
 
 /// 动态创建的kobject对象的ktype

+ 32 - 2
kernel/src/driver/base/kset.rs

@@ -6,8 +6,11 @@ use alloc::{
 
 use core::hash::Hash;
 
-use super::kobject::{
-    DynamicKObjKType, KObjType, KObject, KObjectManager, KObjectState, LockedKObjectState,
+use super::{
+    kobject::{
+        DynamicKObjKType, KObjType, KObject, KObjectManager, KObjectState, LockedKObjectState,
+    },
+    uevent::KobjUeventEnv,
 };
 use crate::{
     filesystem::kernfs::KernFSInode,
@@ -26,6 +29,8 @@ pub struct KSet {
     /// 与父节点有关的一些信息
     parent_data: RwLock<KSetParentData>,
     self_ref: Weak<KSet>,
+    /// kset用于发送uevent的操作函数集。kset能够发送它所包含的各种子kobj、孙kobj的消息,即kobj或其父辈、爷爷辈,都可以发送消息;优先父辈,然后是爷爷辈,以此类推
+    pub uevent_ops: Option<Arc<dyn KSetUeventOps>>,
 }
 
 impl Hash for KSet {
@@ -51,6 +56,7 @@ impl KSet {
             kobj_state: LockedKObjectState::new(None),
             parent_data: RwLock::new(KSetParentData::new(None, None)),
             self_ref: Weak::default(),
+            uevent_ops: Some(Arc::new(KSetUeventOpsDefault)),
         };
 
         let r = Arc::new(r);
@@ -91,6 +97,7 @@ impl KSet {
     pub fn register(&self, join_kset: Option<Arc<KSet>>) -> Result<(), SystemError> {
         return KObjectManager::add_kobj(self.self_ref.upgrade().unwrap(), join_kset);
         // todo: 引入uevent之后,发送uevent
+        // kobject_uevent();
     }
 
     /// 注销一个kset
@@ -232,3 +239,26 @@ impl InnerKSet {
         }
     }
 }
+//https://code.dragonos.org.cn/xref/linux-6.1.9/include/linux/kobject.h#137
+use core::fmt::Debug;
+pub trait KSetUeventOps: Debug + Send + Sync {
+    fn filter(&self) -> Option<i32>;
+    fn uevent_name(&self) -> String;
+    fn uevent(&self, env: &KobjUeventEnv) -> i32;
+}
+#[derive(Debug)]
+pub struct KSetUeventOpsDefault;
+
+impl KSetUeventOps for KSetUeventOpsDefault {
+    fn filter(&self) -> Option<i32> {
+        Some(0)
+    }
+
+    fn uevent_name(&self) -> String {
+        String::new()
+    }
+
+    fn uevent(&self, env: &KobjUeventEnv) -> i32 {
+        0
+    }
+}

+ 1 - 0
kernel/src/driver/base/mod.rs

@@ -12,3 +12,4 @@ pub mod map;
 pub mod platform;
 pub mod subsys;
 pub mod swnode;
+pub mod uevent;

+ 504 - 0
kernel/src/driver/base/uevent/kobject_uevent.rs

@@ -0,0 +1,504 @@
+// https://code.dragonos.org.cn/xref/linux-6.1.9/lib/kobject_uevent.c
+use super::KObject;
+use super::KobjUeventEnv;
+use super::KobjectAction;
+use super::{UEVENT_BUFFER_SIZE, UEVENT_NUM_ENVP};
+use crate::driver::base::kobject::{KObjectManager, KObjectState};
+use crate::init::initcall::INITCALL_POSTCORE;
+use crate::libs::mutex::Mutex;
+use crate::libs::rwlock::RwLock;
+use crate::net::socket::netlink::af_netlink::netlink_has_listeners;
+use crate::net::socket::netlink::af_netlink::NetlinkSocket;
+use crate::net::socket::netlink::af_netlink::{netlink_broadcast, NetlinkSock};
+use crate::net::socket::netlink::netlink::{
+    netlink_kernel_create, NetlinkKernelCfg, NETLINK_KOBJECT_UEVENT, NL_CFG_F_NONROOT_RECV,
+};
+use crate::net::socket::netlink::skbuff::SkBuff;
+use alloc::boxed::Box;
+use alloc::collections::LinkedList;
+use alloc::string::{String, ToString};
+use alloc::sync::Arc;
+use alloc::vec::Vec;
+use core::fmt::Write;
+use num::Zero;
+use system_error::SystemError;
+use unified_init::macros::unified_init;
+// 全局变量
+pub static UEVENT_SEQNUM: u64 = 0;
+// #ifdef CONFIG_UEVENT_HELPER
+// char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
+// #endif
+
+struct UeventSock {
+    inner: NetlinkSock,
+}
+impl UeventSock {
+    pub fn new(inner: NetlinkSock) -> Self {
+        UeventSock { inner }
+    }
+}
+
+// 用于存储所有用于发送 uevent 消息的 netlink sockets。这些 sockets 用于在内核和用户空间之间传递设备事件通知。
+// 每当需要发送 uevent 消息时,内核会遍历这个链表,并通过其中的每一个 socket 发送消息。
+// 使用 Mutex 保护全局链表
+lazy_static::lazy_static! {
+    static ref UEVENT_SOCK_LIST: Mutex<LinkedList<UeventSock>> = Mutex::new(LinkedList::new());
+}
+// 回调函数,当接收到 uevent 消息时调用
+fn uevent_net_rcv() {
+    // netlink_rcv_skb(skb, &uevent_net_rcv_skb);
+}
+
+/// 内核初始化的时候,在设备初始化之前执行
+#[unified_init(INITCALL_POSTCORE)]
+fn kobejct_uevent_init() -> Result<(), SystemError> {
+    // todo: net namespace
+    return uevent_net_init();
+}
+// TODO:等net namespace实现后添加 net 参数和相关操作
+// 内核启动的时候,即使没有进行网络命名空间的隔离也需要调用这个函数
+// 支持 net namespace 之后需要在每个 net namespace 初始化的时候调用这个函数
+/// 为每一个 net namespace 初始化 uevent
+fn uevent_net_init() -> Result<(), SystemError> {
+    let cfg = NetlinkKernelCfg {
+        groups: 1,
+        flags: NL_CFG_F_NONROOT_RECV,
+        ..Default::default()
+    };
+    // 创建一个内核 netlink socket
+    let ue_sk = UeventSock::new(netlink_kernel_create(NETLINK_KOBJECT_UEVENT, Some(cfg)).unwrap());
+
+    // todo: net namespace
+    // net.uevent_sock = ue_sk;
+
+    // 每个 net namespace 向链表中添加一个新的 uevent socket
+    UEVENT_SOCK_LIST.lock().push_back(ue_sk);
+    log::info!("uevent_net_init finish");
+    return Ok(());
+}
+
+// 系统关闭时清理
+fn uevent_net_exit() {
+    // 清理链表
+    UEVENT_SOCK_LIST.lock().clear();
+}
+
+// /* This lock protects uevent_seqnum and uevent_sock_list */
+// static DEFINE_MUTEX(uevent_sock_mutex);
+
+// to be adjust
+pub const BUFFERSIZE: usize = 666;
+
+/*
+ kobject_uevent_env,以envp为环境变量,上报一个指定action的uevent。环境变量的作用是为执行用户空间程序指定运行环境。具体动作如下:
+
+    查找kobj本身或者其parent是否从属于某个kset,如果不是,则报错返回(注2:由此可以说明,如果一个kobject没有加入kset,是不允许上报uevent的)
+    查看kobj->uevent_suppress是否设置,如果设置,则忽略所有的uevent上报并返回(注3:由此可知,可以通过Kobject的uevent_suppress标志,管控Kobject的uevent的上报)
+    如果所属的kset有kset->filter函数,则调用该函数,过滤此次上报(注4:这佐证了3.2小节有关filter接口的说明,kset可以通过filter接口过滤不希望上报的event,从而达到整体的管理效果)
+    判断所属的kset是否有合法的名称(称作subsystem,和前期的内核版本有区别),否则不允许上报uevent
+    分配一个用于此次上报的、存储环境变量的buffer(结果保存在env指针中),并获得该Kobject在sysfs中路径信息(用户空间软件需要依据该路径信息在sysfs中访问它)
+    调用add_uevent_var接口(下面会介绍),将Action、路径信息、subsystem等信息,添加到env指针中
+    如果传入的envp不空,则解析传入的环境变量中,同样调用add_uevent_var接口,添加到env指针中
+    如果所属的kset存在kset->uevent接口,调用该接口,添加kset统一的环境变量到env指针
+    根据ACTION的类型,设置kobj->state_add_uevent_sent和kobj->state_remove_uevent_sent变量,以记录正确的状态
+    调用add_uevent_var接口,添加格式为"SEQNUM=%llu”的序列号
+    如果定义了"CONFIG_NET”,则使用netlink发送该uevent
+    以uevent_helper、subsystem以及添加了标准环境变量(HOME=/,PATH=/sbin:/bin:/usr/sbin:/usr/bin)的env指针为参数,调用kmod模块提供的call_usermodehelper函数,上报uevent。
+    其中uevent_helper的内容是由内核配置项CONFIG_UEVENT_HELPER_PATH(位于./drivers/base/Kconfig)决定的(可参考lib/kobject_uevent.c, line 32),该配置项指定了一个用户空间程序(或者脚本),用于解析上报的uevent,例如"/sbin/hotplug”。
+    call_usermodehelper的作用,就是fork一个进程,以uevent为参数,执行uevent_helper。
+
+kobject_uevent,和kobject_uevent_env功能一样,只是没有指定任何的环境变量。
+
+add_uevent_var,以格式化字符的形式(类似printf、printk等),将环境变量copy到env指针中。
+
+kobject_action_type,将enum kobject_action类型的Action,转换为字符串
+*/
+
+//kobject_uevent->kobject_uevent_env
+pub fn kobject_uevent(kobj: Arc<dyn KObject>, action: KobjectAction) -> Result<(), SystemError> {
+    // kobject_uevent和kobject_uevent_env功能一样,只是没有指定任何的环境变量
+    match kobject_uevent_env(kobj, action, None) {
+        Ok(_) => Ok(()),
+        Err(e) => Err(e),
+    }
+}
+pub fn kobject_uevent_env(
+    kobj: Arc<dyn KObject>,
+    action: KobjectAction,
+    envp_ext: Option<Vec<String>>,
+) -> Result<i32, SystemError> {
+    log::info!("kobject_uevent_env: kobj: {:?}, action: {:?}", kobj, action);
+    let mut state = KObjectState::empty();
+    let mut top_kobj = kobj.parent().unwrap().upgrade().unwrap();
+    let mut retval: i32;
+    let action_string = match action {
+        KobjectAction::KOBJADD => "add".to_string(),
+        KobjectAction::KOBJREMOVE => "remove".to_string(),
+        KobjectAction::KOBJCHANGE => "change".to_string(),
+        KobjectAction::KOBJMOVE => "move".to_string(),
+        KobjectAction::KOBJONLINE => "online".to_string(),
+        KobjectAction::KOBJOFFLINE => "offline".to_string(),
+        KobjectAction::KOBJBIND => "bind".to_string(),
+        KobjectAction::KOBJUNBIND => "unbind".to_string(),
+    };
+    /*
+     * Mark "remove" event done regardless of result, for some subsystems
+     * do not want to re-trigger "remove" event via automatic cleanup.
+     */
+    if let KobjectAction::KOBJREMOVE = action {
+        log::info!("kobject_uevent_env: action: remove");
+        state.insert(KObjectState::REMOVE_UEVENT_SENT);
+    }
+
+    // 不断向上查找,直到找到最顶层的kobject
+    while let Some(weak_parent) = top_kobj.parent() {
+        log::info!("kobject_uevent_env: top_kobj: {:?}", top_kobj);
+        top_kobj = weak_parent.upgrade().unwrap();
+    }
+    /* 查找当前kobject或其parent是否从属于某个kset;如果都不从属于某个kset,则返回错误。(说明一个kobject若没有加入kset,是不会上报uevent的) */
+    if kobj.kset().is_none() && top_kobj.kset().is_none() {
+        log::info!("attempted to send uevent without kset!\n");
+        return Err(SystemError::EINVAL);
+    }
+
+    let kset = top_kobj.kset();
+    // 判断该 kobject 的状态是否设置了uevent_suppress,如果设置了,则忽略所有的uevent上报并返回
+    if kobj.kobj_state().contains(KObjectState::UEVENT_SUPPRESS) {
+        log::info!("uevent_suppress caused the event to drop!");
+        return Ok(0);
+    }
+
+    // 如果所属的kset的kset->filter返回的是0,过滤此次上报
+    if let Some(kset_ref) = kset.as_ref() {
+        if let Some(uevent_ops) = &kset_ref.uevent_ops {
+            if uevent_ops.filter() == Some(0) {
+                log::info!("filter caused the event to drop!");
+                return Ok(0);
+            }
+        }
+    }
+
+    // 判断所属的kset是否有合法的名称(称作subsystem,和前期的内核版本有区别),否则不允许上报uevent
+    // originating subsystem
+    let subsystem: String = if let Some(kset_ref) = kset.as_ref() {
+        if let Some(uevent_ops) = &kset_ref.uevent_ops {
+            let name = uevent_ops.uevent_name();
+            if !name.is_empty() {
+                name
+            } else {
+                kobj.name()
+            }
+        } else {
+            kobj.name()
+        }
+    } else {
+        kobj.name()
+    };
+    if subsystem.is_empty() {
+        log::info!("unset subsystem caused the event to drop!");
+    }
+    log::info!("kobject_uevent_env: subsystem: {}", subsystem);
+
+    // 创建一个用于环境变量的缓冲区
+    let mut env = Box::new(KobjUeventEnv {
+        argv: Vec::with_capacity(UEVENT_NUM_ENVP),
+        envp: Vec::with_capacity(UEVENT_NUM_ENVP),
+        envp_idx: 0,
+        buf: vec![0; UEVENT_BUFFER_SIZE],
+        buflen: 0,
+    });
+    if env.buf.is_empty() {
+        log::error!("kobject_uevent_env: failed to allocate buffer");
+        return Err(SystemError::ENOMEM);
+    }
+
+    // 获取设备的完整对象路径
+    let devpath: String = KObjectManager::kobject_get_path(&kobj);
+    log::info!("kobject_uevent_env: devpath: {}", devpath);
+    if devpath.is_empty() {
+        retval = SystemError::ENOENT.to_posix_errno();
+        // goto exit
+        drop(devpath);
+        drop(env);
+        log::warn!("kobject_uevent_env: devpath is empty");
+        return Ok(retval);
+    }
+    retval = add_uevent_var(&mut env, "ACTION=%s", &action_string).unwrap();
+    log::info!("kobject_uevent_env: retval: {}", retval);
+    if !retval.is_zero() {
+        drop(devpath);
+        drop(env);
+        log::info!("add_uevent_var failed ACTION");
+        return Ok(retval);
+    };
+    retval = add_uevent_var(&mut env, "DEVPATH=%s", &devpath).unwrap();
+    if !retval.is_zero() {
+        drop(devpath);
+        drop(env);
+        log::info!("add_uevent_var failed DEVPATH");
+        return Ok(retval);
+    };
+    retval = add_uevent_var(&mut env, "SUBSYSTEM=%s", &subsystem).unwrap();
+    if !retval.is_zero() {
+        drop(devpath);
+        drop(env);
+        log::info!("add_uevent_var failed SUBSYSTEM");
+        return Ok(retval);
+    };
+
+    /* keys passed in from the caller */
+    if let Some(env_ext) = envp_ext {
+        for var in env_ext {
+            let retval = add_uevent_var(&mut env, "%s", &var).unwrap();
+            if !retval.is_zero() {
+                drop(devpath);
+                drop(env);
+                log::info!("add_uevent_var failed");
+                return Ok(retval);
+            }
+        }
+    }
+    if let Some(kset_ref) = kset.as_ref() {
+        if let Some(uevent_ops) = kset_ref.uevent_ops.as_ref() {
+            if uevent_ops.uevent(&env) != 0 {
+                retval = uevent_ops.uevent(&env);
+                if retval.is_zero() {
+                    log::info!("kset uevent caused the event to drop!");
+                    // goto exit
+                    drop(devpath);
+                    drop(env);
+                    return Ok(retval);
+                }
+            }
+        }
+    }
+    match action {
+        KobjectAction::KOBJADD => {
+            state.insert(KObjectState::ADD_UEVENT_SENT);
+        }
+        KobjectAction::KOBJUNBIND => {
+            zap_modalias_env(&mut env);
+        }
+        _ => {}
+    }
+
+    //mutex_lock(&uevent_sock_mutex);
+    /* we will send an event, so request a new sequence number */
+    retval = add_uevent_var(&mut env, "SEQNUM=%llu", &(UEVENT_SEQNUM + 1).to_string()).unwrap();
+    if !retval.is_zero() {
+        drop(devpath);
+        drop(env);
+        log::info!("add_uevent_var failed");
+        return Ok(retval);
+    }
+    retval = kobject_uevent_net_broadcast(kobj, &env, &action_string, &devpath);
+    //mutex_unlock(&uevent_sock_mutex);
+
+    #[cfg(feature = "UEVENT_HELPER")]
+    fn handle_uevent_helper() {
+        // TODO
+        // 在特性 `UEVENT_HELPER` 开启的情况下,这里的代码会执行
+        // 指定处理uevent的用户空间程序,通常是热插拔程序mdev、udevd等
+        // 	/* call uevent_helper, usually only enabled during early boot */
+        // 	if (uevent_helper[0] && !kobj_usermode_filter(kobj)) {
+        // 		struct subprocess_info *info;
+
+        // 		retval = add_uevent_var(env, "HOME=/");
+        // 		if (retval)
+        // 			goto exit;
+        // 		retval = add_uevent_var(env,
+        // 					"PATH=/sbin:/bin:/usr/sbin:/usr/bin");
+        // 		if (retval)
+        // 			goto exit;
+        // 		retval = init_uevent_argv(env, subsystem);
+        // 		if (retval)
+        // 			goto exit;
+
+        // 		retval = -ENOMEM;
+        // 		info = call_usermodehelper_setup(env->argv[0], env->argv,
+        // 						 env->envp, GFP_KERNEL,
+        // 						 NULL, cleanup_uevent_env, env);
+        // 		if (info) {
+        // 			retval = call_usermodehelper_exec(info, UMH_NO_WAIT);
+        // 			env = NULL;	/* freed by cleanup_uevent_env */
+        // 		}
+        // 	}
+    }
+    #[cfg(not(feature = "UEVENT_HELPER"))]
+    fn handle_uevent_helper() {
+        // 在特性 `UEVENT_HELPER` 关闭的情况下,这里的代码会执行
+    }
+    handle_uevent_helper();
+    drop(devpath);
+    drop(env);
+    log::info!("kobject_uevent_env: retval: {}", retval);
+    return Ok(retval);
+}
+
+pub fn add_uevent_var(
+    env: &mut Box<KobjUeventEnv>,
+    format: &str,
+    args: &str,
+) -> Result<i32, SystemError> {
+    log::info!("add_uevent_var: format: {}, args: {}", format, args);
+    if env.envp_idx >= env.envp.capacity() {
+        log::info!("add_uevent_var: too many keys");
+        return Err(SystemError::ENOMEM);
+    }
+
+    let mut buffer = String::new();
+    write!(&mut buffer, "{} {}", format, args).map_err(|_| SystemError::ENOMEM)?;
+    let len = buffer.len();
+
+    if len >= env.buf.capacity() - env.buflen {
+        log::info!("add_uevent_var: buffer size too small");
+        return Err(SystemError::ENOMEM);
+    }
+
+    // Convert the buffer to bytes and add to env.buf
+    env.buf.extend_from_slice(buffer.as_bytes());
+    env.buf.push(0); // Null-terminate the string
+    env.buflen += len + 1;
+
+    // Add the string to envp
+    env.envp.push(buffer);
+    env.envp_idx += 1;
+
+    Ok(0)
+}
+
+// 用于处理设备树中与模块相关的环境变量
+fn zap_modalias_env(env: &mut Box<KobjUeventEnv>) {
+    // 定义一个静态字符串
+    const MODALIAS_PREFIX: &str = "MODALIAS=";
+    let mut len: usize;
+
+    let mut i = 0;
+    while i < env.envp_idx {
+        // 如果是以 MODALIAS= 开头的字符串
+        if env.envp[i].starts_with(MODALIAS_PREFIX) {
+            len = env.envp[i].len() + 1;
+            // 如果不是最后一个元素
+            if i != env.envp_idx - 1 {
+                // 将后续的环境变量向前移动,以覆盖掉 "MODALIAS=" 前缀的环境变量
+                for j in i..env.envp_idx - 1 {
+                    env.envp[j] = env.envp[j + 1].clone();
+                }
+            }
+            // 减少环境变量数组的索引,因为一个变量已经被移除
+            env.envp_idx -= 1;
+            // 减少环境变量的总长度
+            env.buflen -= len;
+        } else {
+            i += 1;
+        }
+    }
+}
+
+// 用于处理网络相关的uevent(通用事件)广播
+// https://code.dragonos.org.cn/xref/linux-6.1.9/lib/kobject_uevent.c#381
+pub fn kobject_uevent_net_broadcast(
+    kobj: Arc<dyn KObject>,
+    env: &KobjUeventEnv,
+    action_string: &str,
+    devpath: &str,
+) -> i32 {
+    let mut ret = 0;
+    // let net:Net = None;
+    // let mut ops = kobj_ns_ops(kobj);
+
+    // if (!ops && kobj.kset().is_some()) {
+    // 	let ksobj:KObject = &kobj.kset().kobj();
+
+    // 	if (ksobj.parent() != NULL){
+    //         ops = kobj_ns_ops(ksobj.parent());
+    //     }
+
+    // }
+    // TODO: net结构体?
+    // https://code.dragonos.org.cn/xref/linux-6.1.9/include/net/net_namespace.h#60
+    /* kobjects currently only carry network namespace tags and they
+     * are the only tag relevant here since we want to decide which
+     * network namespaces to broadcast the uevent into.
+     */
+    // if (ops && ops.netlink_ns() && kobj.ktype().namespace())
+    // 	if (ops.type() == KOBJ_NS_TYPE_NET)
+    // 		net = kobj.ktype().namespace(kobj);
+    // 如果有网络命名空间,则广播标记的uevent;如果没有,则广播未标记的uevent
+    // if !net.is_none() {
+    //     ret = uevent_net_broadcast_tagged(net.unwrap(), env, action_string, devpath);
+    // } else {
+    ret = uevent_net_broadcast_untagged(env, action_string, devpath);
+    // }
+    log::info!("kobject_uevent_net_broadcast finish. ret: {}", ret);
+    ret
+}
+
+pub fn uevent_net_broadcast_tagged(
+    sk: &dyn NetlinkSocket,
+    env: &KobjUeventEnv,
+    action_string: &str,
+    devpath: &str,
+) -> i32 {
+    let ret = 0;
+    ret
+}
+
+/// 分配一个用于 uevent 消息的 skb(socket buffer)。
+pub fn alloc_uevent_skb<'a>(
+    env: &'a KobjUeventEnv,
+    action_string: &'a str,
+    devpath: &'a str,
+) -> Arc<RwLock<SkBuff>> {
+    let skb = Arc::new(RwLock::new(SkBuff::new()));
+    skb
+}
+// https://code.dragonos.org.cn/xref/linux-6.1.9/lib/kobject_uevent.c#309
+///  广播一个未标记的 uevent 消息
+pub fn uevent_net_broadcast_untagged(
+    env: &KobjUeventEnv,
+    action_string: &str,
+    devpath: &str,
+) -> i32 {
+    log::info!(
+        "uevent_net_broadcast_untagged: action_string: {}, devpath: {}",
+        action_string,
+        devpath
+    );
+    let mut retval = 0;
+    let mut skb = Arc::new(RwLock::new(SkBuff::new()));
+
+    // 锁定 UEVENT_SOCK_LIST 并遍历
+    let ue_sk_list = UEVENT_SOCK_LIST.lock();
+    for ue_sk in ue_sk_list.iter() {
+        // 如果没有监听者,则跳过
+        if netlink_has_listeners(&ue_sk.inner, 1) == 0 {
+            log::info!("uevent_net_broadcast_untagged: no listeners");
+            continue;
+        }
+        // 如果 skb 为空,则分配一个新的 skb
+        if skb.read().is_empty() {
+            log::info!("uevent_net_broadcast_untagged: alloc_uevent_skb failed");
+            retval = SystemError::ENOMEM.to_posix_errno();
+            skb = alloc_uevent_skb(env, action_string, devpath);
+            if skb.read().is_empty() {
+                continue;
+            }
+        }
+        log::info!("next is netlink_broadcast");
+        let netlink_socket: Arc<dyn NetlinkSocket> = Arc::new(ue_sk.inner.clone());
+        retval = match netlink_broadcast(&netlink_socket, Arc::clone(&skb), 0, 1, 1) {
+            Ok(_) => 0,
+            Err(err) => err.to_posix_errno(),
+        };
+        log::info!("finished netlink_broadcast");
+        // ENOBUFS should be handled in userspace
+        if retval == SystemError::ENOBUFS.to_posix_errno()
+            || retval == SystemError::ESRCH.to_posix_errno()
+        {
+            retval = 0;
+        }
+    }
+    // consume_skb(skb);
+    retval
+}

+ 102 - 0
kernel/src/driver/base/uevent/mod.rs

@@ -0,0 +1,102 @@
+// include/linux/kobject.h
+// lib/kobject_uevent.c
+
+/*
+    UEVENT_HELPER_PATH_LEN
+    UEVENT_NUM_ENVP
+    _KOBJECT_H_
+
+Variable
+
+    __randomize_layout
+
+Enum
+
+    kobject_action
+
+Struct
+
+    kobj_attribute
+    kobj_type
+    kobj_uevent_env
+    kobject
+    kset
+    kset_uevent_ops
+
+Function
+
+    get_ktype
+    kobject_name
+    kset_get
+    kset_put
+    to_kset
+*/
+use crate::driver::base::kobject::KObject;
+use alloc::string::String;
+use alloc::vec::Vec;
+
+pub mod kobject_uevent;
+
+// https://code.dragonos.org.cn/xref/linux-6.1.9/lib/kobject_uevent.c?fi=kobject_uevent#457
+// kobject_action
+#[derive(Debug)]
+pub enum KobjectAction {
+    KOBJADD,
+    KOBJREMOVE, //Kobject(或上层数据结构)的添加/移除事件
+    KOBJCHANGE, //Kobject(或上层数据结构)的状态或者内容发生改变; 如果设备驱动需要上报的事件不再上面事件的范围内,或者是自定义的事件,可以使用该event,并携带相应的参数。
+    KOBJMOVE,   //Kobject(或上层数据结构)更改名称或者更改Parent(意味着在sysfs中更改了目录结构)
+    KOBJONLINE,
+    KOBJOFFLINE, //Kobject(或上层数据结构)的上线/下线事件,其实是是否使能
+    KOBJBIND,
+    KOBJUNBIND,
+}
+
+/*
+    @parament:
+
+    envp,指针数组,用于保存每个环境变量的地址,最多可支持的环境变量数量为UEVENT_NUM_ENVP。
+
+    envp_idx,用于访问环境变量指针数组的index。
+
+    buf,保存环境变量的buffer,最大为UEVENT_BUFFER_SIZE。
+
+    buflen,访问buf的变量。
+
+*/
+
+//https://code.dragonos.org.cn/xref/linux-6.1.9/include/linux/kobject.h#31
+
+pub const UEVENT_NUM_ENVP: usize = 64;
+pub const UEVENT_BUFFER_SIZE: usize = 2048;
+pub const UEVENT_HELPER_PATH_LEN: usize = 256;
+
+/// Represents the environment for handling kernel object uevents.
+/*
+    envp,指针数组,用于保存每个环境变量的地址,最多可支持的环境变量数量为UEVENT_NUM_ENVP。
+
+    envp_idx,用于访问环境变量指针数组的index。
+
+    buf,保存环境变量的buffer,最大为UEVENT_BUFFER_SIZE。
+
+    buflen,访问buf的变量。
+
+*/
+// 表示一个待发送的uevent
+#[derive(Debug)]
+pub struct KobjUeventEnv {
+    argv: Vec<String>,
+    envp: Vec<String>,
+    envp_idx: usize,
+    buf: Vec<u8>,
+    buflen: usize,
+}
+
+// kset_uevent_ops是为kset量身订做的一个数据结构,里面包含filter和uevent两个回调函数,用处如下:
+/*
+    filter,当任何Kobject需要上报uevent时,它所属的kset可以通过该接口过滤,阻止不希望上报的event,从而达到从整体上管理的目的。
+
+    name,该接口可以返回kset的名称。如果一个kset没有合法的名称,则其下的所有Kobject将不允许上报uvent
+
+    uevent,当任何Kobject需要上报uevent时,它所属的kset可以通过该接口统一为这些event添加环境变量。因为很多时候上报uevent时的环境变量都是相同的,因此可以由kset统一处理,就不需要让每个Kobject独自添加了。
+
+*/

+ 17 - 59
kernel/src/driver/net/e1000e/e1000e_driver.rs

@@ -8,7 +8,9 @@ use crate::{
             device::{bus::Bus, driver::Driver, Device, DeviceCommonData, DeviceType, IdTable},
             kobject::{KObjType, KObject, KObjectCommonData, KObjectState, LockedKObjectState},
         },
-        net::{register_netdevice, NetDeivceState, NetDevice, NetDeviceCommonData, Operstate},
+        net::{
+            register_netdevice, Iface, IfaceCommon, NetDeivceState, NetDeviceCommonData, Operstate,
+        },
     },
     libs::{
         rwlock::{RwLockReadGuard, RwLockWriteGuard},
@@ -27,11 +29,8 @@ use core::{
     ops::{Deref, DerefMut},
 };
 use log::info;
-use smoltcp::{
-    phy,
-    wire::{self, HardwareAddress},
-};
-use system_error::SystemError;
+use smoltcp::{phy, wire::HardwareAddress};
+// use system_error::SystemError;
 
 use super::e1000e::{E1000EBuffer, E1000EDevice};
 
@@ -78,12 +77,12 @@ impl Debug for E1000EDriverWrapper {
     }
 }
 
-#[cast_to([sync] NetDevice)]
+#[cast_to([sync] Iface)]
 #[cast_to([sync] Device)]
+#[derive(Debug)]
 pub struct E1000EInterface {
     driver: E1000EDriverWrapper,
-    iface_id: usize,
-    iface: SpinLock<smoltcp::iface::Interface>,
+    common: IfaceCommon,
     name: String,
     inner: SpinLock<InnerE1000EInterface>,
     locked_kobj_state: LockedKObjectState,
@@ -201,11 +200,9 @@ impl E1000EInterface {
         let iface =
             smoltcp::iface::Interface::new(iface_config, &mut driver, Instant::now().into());
 
-        let driver: E1000EDriverWrapper = E1000EDriverWrapper(UnsafeCell::new(driver));
         let result = Arc::new(E1000EInterface {
-            driver,
-            iface_id,
-            iface: SpinLock::new(iface),
+            driver: E1000EDriverWrapper(UnsafeCell::new(driver)),
+            common: IfaceCommon::new(iface_id, iface),
             name: format!("eth{}", iface_id),
             inner: SpinLock::new(InnerE1000EInterface {
                 netdevice_common: NetDeviceCommonData::default(),
@@ -223,16 +220,6 @@ impl E1000EInterface {
     }
 }
 
-impl Debug for E1000EInterface {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        f.debug_struct("E1000EInterface")
-            .field("iface_id", &self.iface_id)
-            .field("iface", &"smoltcp::iface::Interface")
-            .field("name", &self.name)
-            .finish()
-    }
-}
-
 impl Device for E1000EInterface {
     fn dev_type(&self) -> DeviceType {
         DeviceType::Net
@@ -302,52 +289,23 @@ impl Device for E1000EInterface {
     }
 }
 
-impl NetDevice for E1000EInterface {
+impl Iface for E1000EInterface {
+    fn common(&self) -> &IfaceCommon {
+        return &self.common;
+    }
+
     fn mac(&self) -> smoltcp::wire::EthernetAddress {
         let mac = self.driver.inner.lock().mac_address();
         return smoltcp::wire::EthernetAddress::from_bytes(&mac);
     }
 
-    #[inline]
-    fn nic_id(&self) -> usize {
-        return self.iface_id;
-    }
-
     #[inline]
     fn iface_name(&self) -> String {
         return self.name.clone();
     }
 
-    fn update_ip_addrs(&self, ip_addrs: &[wire::IpCidr]) -> Result<(), SystemError> {
-        if ip_addrs.len() != 1 {
-            return Err(SystemError::EINVAL);
-        }
-
-        self.iface.lock().update_ip_addrs(|addrs| {
-            let dest = addrs.iter_mut().next();
-
-            if let Some(dest) = dest {
-                *dest = ip_addrs[0];
-            } else {
-                addrs.push(ip_addrs[0]).expect("Push ipCidr failed: full");
-            }
-        });
-        return Ok(());
-    }
-
-    fn poll(&self, sockets: &mut smoltcp::iface::SocketSet) -> Result<(), SystemError> {
-        let timestamp: smoltcp::time::Instant = Instant::now().into();
-        let mut guard = self.iface.lock();
-        let poll_res = guard.poll(timestamp, self.driver.force_get_mut(), sockets);
-        if poll_res {
-            return Ok(());
-        }
-        return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
-    }
-
-    #[inline(always)]
-    fn inner_iface(&self) -> &SpinLock<smoltcp::iface::Interface> {
-        return &self.iface;
+    fn poll(&self) {
+        self.common.poll(self.driver.force_get_mut())
     }
 
     fn addr_assign_type(&self) -> u8 {

+ 5 - 2
kernel/src/driver/net/irq_handle.rs

@@ -7,7 +7,8 @@ use crate::{
         irqdesc::{IrqHandler, IrqReturn},
         IrqNumber,
     },
-    net::net_core::poll_ifaces_try_lock_onetime,
+    // net::net_core::poll_ifaces_try_lock_onetime,
+    net::net_core::poll_ifaces,
 };
 
 /// 默认的网卡中断处理函数
@@ -21,7 +22,9 @@ impl IrqHandler for DefaultNetIrqHandler {
         _static_data: Option<&dyn IrqHandlerData>,
         _dynamic_data: Option<Arc<dyn IrqHandlerData>>,
     ) -> Result<IrqReturn, SystemError> {
-        poll_ifaces_try_lock_onetime().ok();
+        // poll_ifaces_try_lock_onetime().ok();
+        log::warn!("DefaultNetIrqHandler: poll_ifaces_try_lock_onetime -> poll_ifaces");
+        poll_ifaces();
         Ok(IrqReturn::Handled)
     }
 }

+ 42 - 88
kernel/src/driver/net/loopback.rs

@@ -28,7 +28,9 @@ use smoltcp::{
 use system_error::SystemError;
 use unified_init::macros::unified_init;
 
-use super::{register_netdevice, NetDeivceState, NetDevice, NetDeviceCommonData, Operstate};
+use super::{register_netdevice, NetDeivceState, NetDeviceCommonData, Operstate};
+
+use super::{Iface, IfaceCommon};
 
 const DEVICE_NAME: &str = "loopback";
 
@@ -81,6 +83,7 @@ impl phy::TxToken for LoopbackTxToken {
         let result = f(buffer.as_mut_slice());
         let mut device = self.driver.inner.lock();
         device.loopback_transmit(buffer);
+        // debug!("lo transmit!");
         result
     }
 }
@@ -112,7 +115,7 @@ impl Loopback {
         let buffer = self.queue.pop_front();
         match buffer {
             Some(buffer) => {
-                //debug!("lo receive:{:?}", buffer);
+                // debug!("lo receive:{:?}", buffer);
                 return buffer;
             }
             None => {
@@ -127,7 +130,7 @@ impl Loopback {
     /// - &mut self:自身可变引用
     /// - buffer:需要发送的数据包
     pub fn loopback_transmit(&mut self, buffer: Vec<u8>) {
-        //debug!("lo transmit!");
+        // debug!("lo transmit:{:?}", buffer);
         self.queue.push_back(buffer)
     }
 }
@@ -136,6 +139,7 @@ impl Loopback {
 /// 为实现获得不可变引用的Interface的内部可变性,故为Driver提供UnsafeCell包裹器
 ///
 /// 参考virtio_net.rs
+#[derive(Debug)]
 struct LoopbackDriverWapper(UnsafeCell<LoopbackDriver>);
 unsafe impl Send for LoopbackDriverWapper {}
 unsafe impl Sync for LoopbackDriverWapper {}
@@ -214,8 +218,10 @@ impl phy::Device for LoopbackDriver {
         let buffer = self.inner.lock().loopback_receive();
         //receive队列为为空,返回NONE值以通知上层没有可以receive的包
         if buffer.is_empty() {
+            // log::debug!("lo receive none!");
             return Option::None;
         }
+        // log::debug!("lo receive!");
         let rx = LoopbackRxToken { buffer };
         let tx = LoopbackTxToken {
             driver: self.clone(),
@@ -232,6 +238,7 @@ impl phy::Device for LoopbackDriver {
     /// ## 返回值
     /// - 返回一个 `Some`,其中包含一个发送令牌,该令牌包含一个对自身的克隆引用
     fn transmit(&mut self, _timestamp: smoltcp::time::Instant) -> Option<Self::TxToken<'_>> {
+        // log::debug!("lo transmit!");
         Some(LoopbackTxToken {
             driver: self.clone(),
         })
@@ -240,13 +247,12 @@ impl phy::Device for LoopbackDriver {
 
 /// ## LoopbackInterface结构
 /// 封装驱动包裹器和iface,设置接口名称
-#[cast_to([sync] NetDevice)]
+#[cast_to([sync] Iface)]
 #[cast_to([sync] Device)]
+#[derive(Debug)]
 pub struct LoopbackInterface {
     driver: LoopbackDriverWapper,
-    iface_id: usize,
-    iface: SpinLock<smoltcp::iface::Interface>,
-    name: String,
+    common: IfaceCommon,
     inner: SpinLock<InnerLoopbackInterface>,
     locked_kobj_state: LockedKObjectState,
 }
@@ -280,16 +286,20 @@ impl LoopbackInterface {
             smoltcp::iface::Interface::new(iface_config, &mut driver, Instant::now().into());
         //设置网卡地址为127.0.0.1
         iface.update_ip_addrs(|ip_addrs| {
-            ip_addrs
-                .push(IpCidr::new(IpAddress::v4(127, 0, 0, 1), 8))
-                .unwrap();
+            for i in 1..=2 {
+                ip_addrs
+                    .push(IpCidr::new(IpAddress::v4(127, 0, 0, i), 8))
+                    .expect("Push ipCidr failed: full");
+            }
         });
-        let driver = LoopbackDriverWapper(UnsafeCell::new(driver));
+
+        // iface.routes_mut().update(|routes_map| {
+        //     routes_map[0].
+        // });
+
         Arc::new(LoopbackInterface {
-            driver,
-            iface_id,
-            iface: SpinLock::new(iface),
-            name: "lo".to_string(),
+            driver: LoopbackDriverWapper(UnsafeCell::new(driver)),
+            common: IfaceCommon::new(iface_id, iface),
             inner: SpinLock::new(InnerLoopbackInterface {
                 netdevice_common: NetDeviceCommonData::default(),
                 device_common: DeviceCommonData::default(),
@@ -304,16 +314,7 @@ impl LoopbackInterface {
     }
 }
 
-impl Debug for LoopbackInterface {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        f.debug_struct("LoopbackInterface")
-            .field("iface_id", &self.iface_id)
-            .field("iface", &"smtoltcp::iface::Interface")
-            .field("name", &self.name)
-            .finish()
-    }
-}
-
+//TODO: 向sysfs注册lo设备
 impl KObject for LoopbackInterface {
     fn as_any_ref(&self) -> &dyn core::any::Any {
         self
@@ -348,7 +349,7 @@ impl KObject for LoopbackInterface {
     }
 
     fn name(&self) -> String {
-        self.name.clone()
+        "lo".to_string()
     }
 
     fn set_name(&self, _name: String) {
@@ -441,72 +442,23 @@ impl Device for LoopbackInterface {
     }
 }
 
-impl NetDevice for LoopbackInterface {
-    /// 由于lo网卡设备不是实际的物理设备,其mac地址需要手动设置为一个默认值,这里默认为00:00:00:00:00
-    fn mac(&self) -> smoltcp::wire::EthernetAddress {
-        let mac = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00];
-        smoltcp::wire::EthernetAddress(mac)
+impl Iface for LoopbackInterface {
+    fn common(&self) -> &IfaceCommon {
+        &self.common
     }
 
-    #[inline]
-    fn nic_id(&self) -> usize {
-        self.iface_id
-    }
-
-    #[inline]
     fn iface_name(&self) -> String {
-        self.name.clone()
+        "lo".to_string()
     }
-    /// ## `update_ip_addrs` 用于更新接口的 IP 地址。
-    ///
-    /// ## 参数
-    /// - `&self` :自身引用
-    /// - `ip_addrs` :一个包含 `smoltcp::wire::IpCidr` 的切片,表示要设置的 IP 地址和子网掩码
-    ///
-    /// ## 返回值
-    /// - 如果 `ip_addrs` 的长度不为 1,返回 `Err(SystemError::EINVAL)`,表示输入参数无效
-    /// - 如果更新成功,返回 `Ok(())`
-    fn update_ip_addrs(
-        &self,
-        ip_addrs: &[smoltcp::wire::IpCidr],
-    ) -> Result<(), system_error::SystemError> {
-        if ip_addrs.len() != 1 {
-            return Err(SystemError::EINVAL);
-        }
-
-        self.iface.lock().update_ip_addrs(|addrs| {
-            let dest = addrs.iter_mut().next();
 
-            if let Some(dest) = dest {
-                *dest = ip_addrs[0];
-            } else {
-                addrs.push(ip_addrs[0]).expect("Push ipCidr failed: full");
-            }
-        });
-        return Ok(());
-    }
-    /// ## `poll` 用于轮询接口的状态。
-    ///
-    /// ## 参数
-    /// - `&self` :自身引用
-    /// - `sockets` :一个可变引用到 `smoltcp::iface::SocketSet`,表示要轮询的套接字集
-    ///
-    /// ## 返回值
-    /// - 如果轮询成功,返回 `Ok(())`
-    /// - 如果轮询失败,返回 `Err(SystemError::EAGAIN_OR_EWOULDBLOCK)`,表示需要再次尝试或者操作会阻塞
-    fn poll(&self, sockets: &mut smoltcp::iface::SocketSet) -> Result<(), SystemError> {
-        let timestamp: smoltcp::time::Instant = Instant::now().into();
-        let mut guard = self.iface.lock();
-        let poll_res = guard.poll(timestamp, self.driver.force_get_mut(), sockets);
-        if poll_res {
-            return Ok(());
-        }
-        return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
+    /// 由于lo网卡设备不是实际的物理设备,其mac地址需要手动设置为一个默认值,这里默认为00:00:00:00:00
+    fn mac(&self) -> smoltcp::wire::EthernetAddress {
+        let mac = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00];
+        smoltcp::wire::EthernetAddress(mac)
     }
 
-    #[inline(always)]
-    fn inner_iface(&self) -> &SpinLock<smoltcp::iface::Interface> {
-        return &self.iface;
+    fn poll(&self) {
+        self.common.poll(self.driver.force_get_mut())
     }
 
     fn addr_assign_type(&self) -> u8 {
@@ -538,7 +490,7 @@ impl NetDevice for LoopbackInterface {
 pub fn loopback_probe() {
     loopback_driver_init();
 }
-/// ## lo网卡设备初始化函数
+/// # lo网卡设备初始化函数
 /// 创建驱动和iface,初始化一个lo网卡,添加到全局NET_DEVICES中
 pub fn loopback_driver_init() {
     let driver = LoopbackDriver::new();
@@ -548,14 +500,16 @@ pub fn loopback_driver_init() {
 
     NET_DEVICES
         .write_irqsave()
-        .insert(iface.iface_id, iface.clone());
+        .insert(iface.nic_id(), iface.clone());
 
     register_netdevice(iface.clone()).expect("register lo device failed");
 }
 
 /// ## lo网卡设备的注册函数
-#[unified_init(INITCALL_DEVICE)]
+//TODO: 现在先不用初始化宏进行注册,使virtonet排在网卡列表头,待网络子系统重构后再使用初始化宏并修复该bug
+// #[unified_init(INITCALL_DEVICE)]
 pub fn loopback_init() -> Result<(), SystemError> {
     loopback_probe();
+    log::debug!("Successfully init loopback device");
     return Ok(());
 }

+ 177 - 11
kernel/src/driver/net/mod.rs

@@ -1,3 +1,4 @@
+use alloc::{fmt, vec::Vec};
 use alloc::{string::String, sync::Arc};
 use smoltcp::{
     iface,
@@ -5,8 +6,12 @@ use smoltcp::{
 };
 use sysfs::netdev_register_kobject;
 
-use super::base::device::Device;
-use crate::libs::spinlock::SpinLock;
+use crate::{
+    libs::{rwlock::RwLock, spinlock::SpinLock},
+    net::socket::inet::{common::PortManager, InetSocket},
+    process::ProcessState,
+};
+use smoltcp;
 use system_error::SystemError;
 
 pub mod class;
@@ -52,23 +57,63 @@ pub enum Operstate {
 }
 
 #[allow(dead_code)]
-pub trait NetDevice: Device {
-    /// @brief 获取网卡的MAC地址
-    fn mac(&self) -> EthernetAddress;
+pub trait Iface: crate::driver::base::device::Device {
+    /// # `common`
+    /// 获取网卡的公共信息
+    fn common(&self) -> &IfaceCommon;
+
+    /// # `mac`
+    /// 获取网卡的MAC地址
+    fn mac(&self) -> smoltcp::wire::EthernetAddress;
 
+    /// # `name`
+    /// 获取网卡名
     fn iface_name(&self) -> String;
 
-    /// @brief 获取网卡的id
-    fn nic_id(&self) -> usize;
+    /// # `nic_id`
+    /// 获取网卡id
+    fn nic_id(&self) -> usize {
+        self.common().iface_id
+    }
 
-    fn poll(&self, sockets: &mut iface::SocketSet) -> Result<(), SystemError>;
+    /// # `poll`
+    /// 用于轮询接口的状态。
+    /// ## 参数
+    /// - `sockets` :一个可变引用到 `smoltcp::iface::SocketSet`,表示要轮询的套接字集
+    /// ## 返回值
+    /// - 成功返回 `Ok(())`
+    /// - 如果轮询失败,返回 `Err(SystemError::EAGAIN_OR_EWOULDBLOCK)`,表示需要再次尝试或者操作会阻塞
+    fn poll(&self);
 
-    fn update_ip_addrs(&self, ip_addrs: &[wire::IpCidr]) -> Result<(), SystemError>;
+    /// # `update_ip_addrs`
+    /// 用于更新接口的 IP 地址
+    /// ## 参数
+    /// - `ip_addrs` :一个包含 `smoltcp::wire::IpCidr` 的切片,表示要设置的 IP 地址和子网掩码
+    /// ## 返回值
+    /// - 如果 `ip_addrs` 的长度不为 1,返回 `Err(SystemError::EINVAL)`,表示输入参数无效
+    fn update_ip_addrs(&self, ip_addrs: &[smoltcp::wire::IpCidr]) -> Result<(), SystemError> {
+        self.common().update_ip_addrs(ip_addrs)
+    }
 
     /// @brief 获取smoltcp的网卡接口类型
-    fn inner_iface(&self) -> &SpinLock<smoltcp::iface::Interface>;
+    #[inline(always)]
+    fn smol_iface(&self) -> &SpinLock<smoltcp::iface::Interface> {
+        &self.common().smol_iface
+    }
     // fn as_any_ref(&'static self) -> &'static dyn core::any::Any;
 
+    /// # `sockets`
+    /// 获取网卡的套接字集
+    fn sockets(&self) -> &SpinLock<smoltcp::iface::SocketSet<'static>> {
+        &self.common().sockets
+    }
+
+    /// # `port_manager`
+    /// 用于管理网卡的端口
+    fn port_manager(&self) -> &PortManager {
+        &self.common().port_manager
+    }
+
     fn addr_assign_type(&self) -> u8;
 
     fn net_device_type(&self) -> u16;
@@ -108,7 +153,7 @@ impl Default for NetDeviceCommonData {
 
 /// 将网络设备注册到sysfs中
 /// 参考:https://code.dragonos.org.cn/xref/linux-2.6.39/net/core/dev.c?fi=register_netdev#5373
-fn register_netdevice(dev: Arc<dyn NetDevice>) -> Result<(), SystemError> {
+fn register_netdevice(dev: Arc<dyn Iface>) -> Result<(), SystemError> {
     // 在sysfs中注册设备
     netdev_register_kobject(dev.clone())?;
 
@@ -117,3 +162,124 @@ fn register_netdevice(dev: Arc<dyn NetDevice>) -> Result<(), SystemError> {
 
     return Ok(());
 }
+
+pub struct IfaceCommon {
+    iface_id: usize,
+    smol_iface: SpinLock<smoltcp::iface::Interface>,
+    /// 存smoltcp网卡的套接字集
+    sockets: SpinLock<smoltcp::iface::SocketSet<'static>>,
+    /// 存 kernel wrap smoltcp socket 的集合
+    bounds: RwLock<Vec<Arc<dyn InetSocket>>>,
+    /// 端口管理器
+    port_manager: PortManager,
+    /// 下次轮询的时间
+    poll_at_ms: core::sync::atomic::AtomicU64,
+}
+
+impl fmt::Debug for IfaceCommon {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("IfaceCommon")
+            .field("iface_id", &self.iface_id)
+            .field("sockets", &self.sockets)
+            .field("bounds", &self.bounds)
+            .field("port_manager", &self.port_manager)
+            .field("poll_at_ms", &self.poll_at_ms)
+            .finish()
+    }
+}
+
+impl IfaceCommon {
+    pub fn new(iface_id: usize, iface: smoltcp::iface::Interface) -> Self {
+        IfaceCommon {
+            iface_id,
+            smol_iface: SpinLock::new(iface),
+            sockets: SpinLock::new(smoltcp::iface::SocketSet::new(Vec::new())),
+            bounds: RwLock::new(Vec::new()),
+            port_manager: PortManager::new(),
+            poll_at_ms: core::sync::atomic::AtomicU64::new(0),
+        }
+    }
+
+    pub fn poll<D>(&self, device: &mut D)
+    where
+        D: smoltcp::phy::Device + ?Sized,
+    {
+        let timestamp = crate::time::Instant::now().into();
+        let mut sockets = self.sockets.lock_irqsave();
+        let mut interface = self.smol_iface.lock_irqsave();
+
+        let (has_events, poll_at) = {
+            let mut has_events = false;
+            let mut poll_at;
+            loop {
+                has_events |= interface.poll(timestamp, device, &mut sockets);
+                poll_at = interface.poll_at(timestamp, &sockets);
+                let Some(instant) = poll_at else {
+                    break;
+                };
+                if instant > timestamp {
+                    break;
+                }
+            }
+            (has_events, poll_at)
+        };
+
+        // drop sockets here to avoid deadlock
+        drop(interface);
+        drop(sockets);
+
+        use core::sync::atomic::Ordering;
+        if let Some(instant) = poll_at {
+            let _old_instant = self.poll_at_ms.load(Ordering::Relaxed);
+            let new_instant = instant.total_millis() as u64;
+            self.poll_at_ms.store(new_instant, Ordering::Relaxed);
+
+            // if old_instant == 0 || new_instant < old_instant {
+            //     self.polling_wait_queue.wake_all();
+            // }
+        } else {
+            self.poll_at_ms.store(0, Ordering::Relaxed);
+        }
+
+        if has_events {
+            // log::debug!("IfaceCommon::poll: has_events");
+            // We never try to hold the write lock in the IRQ context, and we disable IRQ when
+            // holding the write lock. So we don't need to disable IRQ when holding the read lock.
+            self.bounds.read().iter().for_each(|bound_socket| {
+                bound_socket.on_iface_events();
+                bound_socket
+                    .wait_queue()
+                    .wakeup(Some(ProcessState::Blocked(true)));
+            });
+
+            // let closed_sockets = self
+            //     .closing_sockets
+            //     .lock_irq_disabled()
+            //     .extract_if(|closing_socket| closing_socket.is_closed())
+            //     .collect::<Vec<_>>();
+            // drop(closed_sockets);
+        }
+    }
+
+    pub fn update_ip_addrs(&self, ip_addrs: &[smoltcp::wire::IpCidr]) -> Result<(), SystemError> {
+        if ip_addrs.len() != 1 {
+            return Err(SystemError::EINVAL);
+        }
+
+        self.smol_iface.lock().update_ip_addrs(|addrs| {
+            let dest = addrs.iter_mut().next();
+
+            if let Some(dest) = dest {
+                *dest = ip_addrs[0];
+            } else {
+                addrs.push(ip_addrs[0]).expect("Push ipCidr failed: full");
+            }
+        });
+        return Ok(());
+    }
+
+    // 需要bounds储存具体的Inet Socket信息,以提供不同种类inet socket的事件分发
+    pub fn bind_socket(&self, socket: Arc<dyn InetSocket>) {
+        self.bounds.write().push(socket);
+    }
+}

+ 12 - 12
kernel/src/driver/net/sysfs.rs

@@ -17,11 +17,11 @@ use intertrait::cast::CastArc;
 use log::error;
 use system_error::SystemError;
 
-use super::{class::sys_class_net_instance, NetDeivceState, NetDevice, Operstate};
+use super::{class::sys_class_net_instance, Iface, NetDeivceState, Operstate};
 
 /// 将设备注册到`/sys/class/net`目录下
 /// 参考:https://code.dragonos.org.cn/xref/linux-2.6.39/net/core/net-sysfs.c?fi=netdev_register_kobject#1311
-pub fn netdev_register_kobject(dev: Arc<dyn NetDevice>) -> Result<(), SystemError> {
+pub fn netdev_register_kobject(dev: Arc<dyn Iface>) -> Result<(), SystemError> {
     // 初始化设备
     device_manager().device_default_initialize(&(dev.clone() as Arc<dyn Device>));
 
@@ -103,8 +103,8 @@ impl Attribute for AttrAddrAssignType {
     }
 
     fn show(&self, kobj: Arc<dyn KObject>, buf: &mut [u8]) -> Result<usize, SystemError> {
-        let net_device = kobj.cast::<dyn NetDevice>().map_err(|_| {
-            error!("AttrAddrAssignType::show() failed: kobj is not a NetDevice");
+        let net_device = kobj.cast::<dyn Iface>().map_err(|_| {
+            error!("AttrAddrAssignType::show() failed: kobj is not a Iface");
             SystemError::EINVAL
         })?;
         let addr_assign_type = net_device.addr_assign_type();
@@ -271,8 +271,8 @@ impl Attribute for AttrType {
     }
 
     fn show(&self, kobj: Arc<dyn KObject>, buf: &mut [u8]) -> Result<usize, SystemError> {
-        let net_deive = kobj.cast::<dyn NetDevice>().map_err(|_| {
-            error!("AttrType::show() failed: kobj is not a NetDevice");
+        let net_deive = kobj.cast::<dyn Iface>().map_err(|_| {
+            error!("AttrType::show() failed: kobj is not a Iface");
             SystemError::EINVAL
         })?;
         let net_type = net_deive.net_device_type();
@@ -322,8 +322,8 @@ impl Attribute for AttrAddress {
     }
 
     fn show(&self, kobj: Arc<dyn KObject>, buf: &mut [u8]) -> Result<usize, SystemError> {
-        let net_device = kobj.cast::<dyn NetDevice>().map_err(|_| {
-            error!("AttrAddress::show() failed: kobj is not a NetDevice");
+        let net_device = kobj.cast::<dyn Iface>().map_err(|_| {
+            error!("AttrAddress::show() failed: kobj is not a Iface");
             SystemError::EINVAL
         })?;
         let mac_addr = net_device.mac();
@@ -373,8 +373,8 @@ impl Attribute for AttrCarrier {
     }
 
     fn show(&self, kobj: Arc<dyn KObject>, buf: &mut [u8]) -> Result<usize, SystemError> {
-        let net_device = kobj.cast::<dyn NetDevice>().map_err(|_| {
-            error!("AttrCarrier::show() failed: kobj is not a NetDevice");
+        let net_device = kobj.cast::<dyn Iface>().map_err(|_| {
+            error!("AttrCarrier::show() failed: kobj is not a Iface");
             SystemError::EINVAL
         })?;
         if net_device
@@ -489,8 +489,8 @@ impl Attribute for AttrOperstate {
     }
 
     fn show(&self, _kobj: Arc<dyn KObject>, _buf: &mut [u8]) -> Result<usize, SystemError> {
-        let net_device = _kobj.cast::<dyn NetDevice>().map_err(|_| {
-            error!("AttrOperstate::show() failed: kobj is not a NetDevice");
+        let net_device = _kobj.cast::<dyn Iface>().map_err(|_| {
+            error!("AttrOperstate::show() failed: kobj is not a Iface");
             SystemError::EINVAL
         })?;
         if !net_device

+ 17 - 60
kernel/src/driver/net/virtio_net.rs

@@ -16,7 +16,7 @@ use smoltcp::{iface, phy, wire};
 use unified_init::macros::unified_init;
 use virtio_drivers::device::net::VirtIONet;
 
-use super::{NetDeivceState, NetDevice, NetDeviceCommonData, Operstate};
+use super::{Iface, NetDeivceState, NetDeviceCommonData, Operstate};
 use crate::{
     arch::rand::rand,
     driver::{
@@ -47,7 +47,7 @@ use crate::{
         rwlock::{RwLockReadGuard, RwLockWriteGuard},
         spinlock::{SpinLock, SpinLockGuard},
     },
-    net::{generate_iface_id, net_core::poll_ifaces_try_lock_onetime, NET_DEVICES},
+    net::{generate_iface_id, net_core::poll_ifaces, NET_DEVICES},
     time::Instant,
 };
 use system_error::SystemError;
@@ -253,7 +253,8 @@ impl Device for VirtIONetDevice {
 
 impl VirtIODevice for VirtIONetDevice {
     fn handle_irq(&self, _irq: IrqNumber) -> Result<IrqReturn, SystemError> {
-        poll_ifaces_try_lock_onetime().ok();
+        log::warn!("VirtioInterface: poll_ifaces_try_lock_onetime -> poll_ifaces");
+        poll_ifaces();
         return Ok(IrqReturn::Handled);
     }
 
@@ -362,13 +363,13 @@ impl Debug for VirtIONicDeviceInner {
     }
 }
 
-#[cast_to([sync] NetDevice)]
+#[cast_to([sync] Iface)]
 #[cast_to([sync] Device)]
+#[derive(Debug)]
 pub struct VirtioInterface {
     device_inner: VirtIONicDeviceInnerWrapper,
-    iface_id: usize,
     iface_name: String,
-    iface: SpinLock<iface::Interface>,
+    iface_common: super::IfaceCommon,
     inner: SpinLock<InnerVirtIOInterface>,
     locked_kobj_state: LockedKObjectState,
 }
@@ -380,17 +381,6 @@ struct InnerVirtIOInterface {
     netdevice_common: NetDeviceCommonData,
 }
 
-impl core::fmt::Debug for VirtioInterface {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        f.debug_struct("VirtioInterface")
-            .field("iface_id", &self.iface_id)
-            .field("iface_name", &self.iface_name)
-            .field("inner", &self.inner)
-            .field("locked_kobj_state", &self.locked_kobj_state)
-            .finish()
-    }
-}
-
 impl VirtioInterface {
     pub fn new(mut device_inner: VirtIONicDeviceInner) -> Arc<Self> {
         let iface_id = generate_iface_id();
@@ -403,10 +393,9 @@ impl VirtioInterface {
 
         let result = Arc::new(VirtioInterface {
             device_inner: VirtIONicDeviceInnerWrapper(UnsafeCell::new(device_inner)),
-            iface_id,
             locked_kobj_state: LockedKObjectState::default(),
-            iface: SpinLock::new(iface),
             iface_name: format!("eth{}", iface_id),
+            iface_common: super::IfaceCommon::new(iface_id, iface),
             inner: SpinLock::new(InnerVirtIOInterface {
                 kobj_common: KObjectCommonData::default(),
                 device_common: DeviceCommonData::default(),
@@ -431,7 +420,7 @@ impl VirtioInterface {
 impl Drop for VirtioInterface {
     fn drop(&mut self) {
         // 从全局的网卡接口信息表中删除这个网卡的接口信息
-        NET_DEVICES.write_irqsave().remove(&self.iface_id);
+        NET_DEVICES.write_irqsave().remove(&self.nic_id());
     }
 }
 
@@ -624,57 +613,25 @@ pub fn virtio_net(
     }
 }
 
-impl NetDevice for VirtioInterface {
+impl Iface for VirtioInterface {
+    fn common(&self) -> &super::IfaceCommon {
+        &self.iface_common
+    }
+
     fn mac(&self) -> wire::EthernetAddress {
         let mac: [u8; 6] = self.device_inner.inner.lock().mac_address();
         return wire::EthernetAddress::from_bytes(&mac);
     }
 
-    #[inline]
-    fn nic_id(&self) -> usize {
-        return self.iface_id;
-    }
-
     #[inline]
     fn iface_name(&self) -> String {
         return self.iface_name.clone();
     }
 
-    fn update_ip_addrs(&self, ip_addrs: &[wire::IpCidr]) -> Result<(), SystemError> {
-        if ip_addrs.len() != 1 {
-            return Err(SystemError::EINVAL);
-        }
-
-        self.iface.lock().update_ip_addrs(|addrs| {
-            let dest = addrs.iter_mut().next();
-
-            if let Some(dest) = dest {
-                *dest = ip_addrs[0];
-            } else {
-                addrs
-                    .push(ip_addrs[0])
-                    .expect("Push wire::IpCidr failed: full");
-            }
-        });
-        return Ok(());
+    fn poll(&self) {
+        self.iface_common.poll(self.device_inner.force_get_mut())
     }
 
-    fn poll(&self, sockets: &mut iface::SocketSet) -> Result<(), SystemError> {
-        let timestamp: smoltcp::time::Instant = Instant::now().into();
-        let mut guard = self.iface.lock();
-        let poll_res = guard.poll(timestamp, self.device_inner.force_get_mut(), sockets);
-        // todo: notify!!!
-        // debug!("Virtio Interface poll:{poll_res}");
-        if poll_res {
-            return Ok(());
-        }
-        return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
-    }
-
-    #[inline(always)]
-    fn inner_iface(&self) -> &SpinLock<iface::Interface> {
-        return &self.iface;
-    }
     // fn as_any_ref(&'static self) -> &'static dyn core::any::Any {
     //     return self;
     // }
@@ -839,7 +796,7 @@ impl VirtIODriver for VirtIONetDriver {
         // 设置iface的父设备为virtio_net_device
         iface.set_dev_parent(Some(Arc::downgrade(&virtio_net_device) as Weak<dyn Device>));
         // 在sysfs中注册iface
-        register_netdevice(iface.clone() as Arc<dyn NetDevice>)?;
+        register_netdevice(iface.clone() as Arc<dyn Iface>)?;
 
         // 将网卡的接口信息注册到全局的网卡接口信息表中
         NET_DEVICES

+ 10 - 9
kernel/src/filesystem/vfs/file.rs

@@ -23,7 +23,7 @@ use crate::{
     mm::{page::Page, MemoryManagementArch},
     net::{
         event_poll::{EPollItem, EPollPrivateData, EventPoll},
-        socket::SocketInode,
+        socket::Inode as SocketInode,
     },
     process::{cred::Cred, ProcessManager},
 };
@@ -570,9 +570,10 @@ impl File {
         match self.file_type {
             FileType::Socket => {
                 let inode = self.inode.downcast_ref::<SocketInode>().unwrap();
-                let mut socket = inode.inner();
+                // let mut socket = inode.inner();
 
-                return socket.add_epoll(epitem);
+                inode.epoll_items().add(epitem);
+                return Ok(());
             }
             FileType::Pipe => {
                 let inode = self.inode.downcast_ref::<LockedPipeInode>().unwrap();
@@ -592,12 +593,12 @@ impl File {
     /// ## 删除一个绑定的epoll
     pub fn remove_epoll(&self, epoll: &Weak<SpinLock<EventPoll>>) -> Result<(), SystemError> {
         match self.file_type {
-            FileType::Socket => {
-                let inode = self.inode.downcast_ref::<SocketInode>().unwrap();
-                let mut socket = inode.inner();
-
-                socket.remove_epoll(epoll)
-            }
+            FileType::Socket => self
+                .inode
+                .downcast_ref::<SocketInode>()
+                .unwrap()
+                .epoll_items()
+                .remove(epoll),
             FileType::Pipe => {
                 let inode = self.inode.downcast_ref::<LockedPipeInode>().unwrap();
                 inode.inner().lock().remove_epoll(epoll)

+ 2 - 2
kernel/src/filesystem/vfs/open.rs

@@ -1,5 +1,5 @@
 use alloc::sync::Arc;
-use log::warn;
+use log::{debug, warn};
 use system_error::SystemError;
 
 use super::{
@@ -81,7 +81,7 @@ fn do_sys_openat2(
     how: OpenHow,
     follow_symlink: bool,
 ) -> Result<usize, SystemError> {
-    // debug!("open path: {}, how: {:?}", path, how);
+    //debug!("open path: {}, how: {:?}", path, how);
     let path = path.trim();
 
     let (inode_begin, path) = user_path_at(&ProcessManager::current_pcb(), dirfd, path)?;

+ 1 - 1
kernel/src/filesystem/vfs/syscall.rs

@@ -2,7 +2,7 @@ use core::ffi::c_void;
 use core::mem::size_of;
 
 use alloc::{string::String, sync::Arc, vec::Vec};
-use log::warn;
+use log::{debug, warn};
 use system_error::SystemError;
 
 use crate::producefs;

+ 5 - 1
kernel/src/init/initial_kthread.rs

@@ -8,7 +8,10 @@ use system_error::SystemError;
 
 use crate::{
     arch::{interrupt::TrapFrame, process::arch_switch_to_user},
-    driver::{net::e1000e::e1000e::e1000e_init, virtio::virtio::virtio_probe},
+    driver::{
+        net::{e1000e::e1000e::e1000e_init, loopback::loopback_init},
+        virtio::virtio::virtio_probe,
+    },
     filesystem::vfs::core::mount_root_fs,
     net::net_core::net_init,
     process::{kthread::KernelThreadMechanism, stdio::stdio_init, ProcessFlags, ProcessManager},
@@ -40,6 +43,7 @@ fn kernel_init() -> Result<(), SystemError> {
     net_init().unwrap_or_else(|err| {
         error!("Failed to initialize network: {:?}", err);
     });
+    loopback_init()?;
 
     debug!("initial kernel thread done.");
 

+ 7 - 122
kernel/src/net/mod.rs

@@ -1,3 +1,7 @@
+//! # 网络模块
+//! 注意,net模块下,为了方便导入,模块细分,且共用部分模块直接使用
+//! `pub use`导出,导入时也常见`use crate::net::socket::*`的写法,
+//! 敬请注意。
 use core::{
     fmt::{self, Debug},
     sync::atomic::AtomicUsize,
@@ -5,20 +9,18 @@ use core::{
 
 use alloc::{collections::BTreeMap, sync::Arc};
 
-use crate::{driver::net::NetDevice, libs::rwlock::RwLock};
-use smoltcp::wire::IpEndpoint;
-
-use self::socket::SocketInode;
+use crate::{driver::net::Iface, libs::rwlock::RwLock};
 
 pub mod event_poll;
 pub mod net_core;
 pub mod socket;
 pub mod syscall;
+pub mod syscall_util;
 
 lazy_static! {
     /// # 所有网络接口的列表
     /// 这个列表在中断上下文会使用到,因此需要irqsave
-    pub static ref NET_DEVICES: RwLock<BTreeMap<usize, Arc<dyn NetDevice>>> = RwLock::new(BTreeMap::new());
+    pub static ref NET_DEVICES: RwLock<BTreeMap<usize, Arc<dyn Iface>>> = RwLock::new(BTreeMap::new());
 }
 
 /// 生成网络接口的id (全局自增)
@@ -26,120 +28,3 @@ pub fn generate_iface_id() -> usize {
     static IFACE_ID: AtomicUsize = AtomicUsize::new(0);
     return IFACE_ID.fetch_add(1, core::sync::atomic::Ordering::SeqCst);
 }
-
-bitflags! {
-    /// @brief 用于指定socket的关闭类型
-    /// 参考:https://code.dragonos.org.cn/xref/linux-6.1.9/include/net/sock.h?fi=SHUTDOWN_MASK#1573
-    pub struct ShutdownType: u8 {
-        const RCV_SHUTDOWN = 1;
-        const SEND_SHUTDOWN = 2;
-        const SHUTDOWN_MASK = 3;
-    }
-}
-
-#[derive(Debug, Clone)]
-pub enum Endpoint {
-    /// 链路层端点
-    LinkLayer(LinkLayerEndpoint),
-    /// 网络层端点
-    Ip(Option<IpEndpoint>),
-    /// inode端点
-    Inode(Option<Arc<SocketInode>>),
-    // todo: 增加NetLink机制后,增加NetLink端点
-}
-
-/// @brief 链路层端点
-#[derive(Debug, Clone)]
-pub struct LinkLayerEndpoint {
-    /// 网卡的接口号
-    pub interface: usize,
-}
-
-impl LinkLayerEndpoint {
-    /// @brief 创建一个链路层端点
-    ///
-    /// @param interface 网卡的接口号
-    ///
-    /// @return 返回创建的链路层端点
-    pub fn new(interface: usize) -> Self {
-        Self { interface }
-    }
-}
-
-/// IP datagram encapsulated protocol.
-#[derive(Debug, PartialEq, Eq, Clone, Copy)]
-#[repr(u8)]
-pub enum Protocol {
-    HopByHop = 0x00,
-    Icmp = 0x01,
-    Igmp = 0x02,
-    Tcp = 0x06,
-    Udp = 0x11,
-    Ipv6Route = 0x2b,
-    Ipv6Frag = 0x2c,
-    Icmpv6 = 0x3a,
-    Ipv6NoNxt = 0x3b,
-    Ipv6Opts = 0x3c,
-    Unknown(u8),
-}
-
-impl fmt::Display for Protocol {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match *self {
-            Protocol::HopByHop => write!(f, "Hop-by-Hop"),
-            Protocol::Icmp => write!(f, "ICMP"),
-            Protocol::Igmp => write!(f, "IGMP"),
-            Protocol::Tcp => write!(f, "TCP"),
-            Protocol::Udp => write!(f, "UDP"),
-            Protocol::Ipv6Route => write!(f, "IPv6-Route"),
-            Protocol::Ipv6Frag => write!(f, "IPv6-Frag"),
-            Protocol::Icmpv6 => write!(f, "ICMPv6"),
-            Protocol::Ipv6NoNxt => write!(f, "IPv6-NoNxt"),
-            Protocol::Ipv6Opts => write!(f, "IPv6-Opts"),
-            Protocol::Unknown(id) => write!(f, "0x{id:02x}"),
-        }
-    }
-}
-
-impl From<smoltcp::wire::IpProtocol> for Protocol {
-    fn from(value: smoltcp::wire::IpProtocol) -> Self {
-        let x: u8 = value.into();
-        Protocol::from(x)
-    }
-}
-
-impl From<u8> for Protocol {
-    fn from(value: u8) -> Self {
-        match value {
-            0x00 => Protocol::HopByHop,
-            0x01 => Protocol::Icmp,
-            0x02 => Protocol::Igmp,
-            0x06 => Protocol::Tcp,
-            0x11 => Protocol::Udp,
-            0x2b => Protocol::Ipv6Route,
-            0x2c => Protocol::Ipv6Frag,
-            0x3a => Protocol::Icmpv6,
-            0x3b => Protocol::Ipv6NoNxt,
-            0x3c => Protocol::Ipv6Opts,
-            _ => Protocol::Unknown(value),
-        }
-    }
-}
-
-impl From<Protocol> for u8 {
-    fn from(value: Protocol) -> Self {
-        match value {
-            Protocol::HopByHop => 0x00,
-            Protocol::Icmp => 0x01,
-            Protocol::Igmp => 0x02,
-            Protocol::Tcp => 0x06,
-            Protocol::Udp => 0x11,
-            Protocol::Ipv6Route => 0x2b,
-            Protocol::Ipv6Frag => 0x2c,
-            Protocol::Icmpv6 => 0x3a,
-            Protocol::Ipv6NoNxt => 0x3b,
-            Protocol::Ipv6Opts => 0x3c,
-            Protocol::Unknown(id) => id,
-        }
-    }
-}

+ 131 - 134
kernel/src/net/net_core.rs

@@ -4,17 +4,12 @@ use smoltcp::{socket::dhcpv4, wire};
 use system_error::SystemError;
 
 use crate::{
-    driver::net::{NetDevice, Operstate},
+    driver::net::{Iface, Operstate},
     libs::rwlock::RwLockReadGuard,
-    net::{socket::SocketPollMethod, NET_DEVICES},
+    net::NET_DEVICES,
     time::timer::{next_n_ms_timer_jiffies, Timer, TimerFunction},
 };
 
-use super::{
-    event_poll::{EPollEventType, EventPoll},
-    socket::{handle::GlobalSocketHandle, inet::TcpSocket, HANDLE_MAP, SOCKET_SET},
-};
-
 /// The network poll function, which will be called by timer.
 ///
 /// The main purpose of this function is to poll all network interfaces.
@@ -24,7 +19,7 @@ struct NetWorkPollFunc;
 
 impl TimerFunction for NetWorkPollFunc {
     fn run(&mut self) -> Result<(), SystemError> {
-        poll_ifaces_try_lock(10).ok();
+        poll_ifaces();
         let next_time = next_n_ms_timer_jiffies(10);
         let timer = Timer::new(Box::new(NetWorkPollFunc), next_time);
         timer.activate();
@@ -43,10 +38,10 @@ pub fn net_init() -> Result<(), SystemError> {
 
 fn dhcp_query() -> Result<(), SystemError> {
     let binding = NET_DEVICES.write_irqsave();
-
+    log::debug!("binding: {:?}", *binding);
     //由于现在os未实现在用户态为网卡动态分配内存,而lo网卡的id最先分配且ip固定不能被分配
-    //所以特判取用id为1的网卡(也就是virto_net)
-    let net_face = binding.get(&1).ok_or(SystemError::ENODEV)?.clone();
+    //所以特判取用id为0的网卡(也就是virto_net)
+    let net_face = binding.get(&0).ok_or(SystemError::ENODEV)?.clone();
 
     drop(binding);
 
@@ -59,13 +54,16 @@ fn dhcp_query() -> Result<(), SystemError> {
     // IMPORTANT: This should be removed in production.
     dhcp_socket.set_max_lease_duration(Some(smoltcp::time::Duration::from_secs(10)));
 
-    let dhcp_handle = SOCKET_SET.lock_irqsave().add(dhcp_socket);
+    let sockets = || net_face.sockets().lock_irqsave();
+
+    // let dhcp_handle = SOCKET_SET.lock_irqsave().add(dhcp_socket);
+    let dhcp_handle = sockets().add(dhcp_socket);
 
     const DHCP_TRY_ROUND: u8 = 10;
     for i in 0..DHCP_TRY_ROUND {
-        debug!("DHCP try round: {}", i);
-        net_face.poll(&mut SOCKET_SET.lock_irqsave()).ok();
-        let mut binding = SOCKET_SET.lock_irqsave();
+        log::debug!("DHCP try round: {}", i);
+        net_face.poll();
+        let mut binding = sockets();
         let event = binding.get_mut::<dhcpv4::Socket>(dhcp_handle).poll();
 
         match event {
@@ -81,13 +79,26 @@ fn dhcp_query() -> Result<(), SystemError> {
                     .ok();
 
                 if let Some(router) = config.router {
-                    net_face
-                        .inner_iface()
-                        .lock()
+                    let mut smol_iface = net_face.smol_iface().lock();
+                    smol_iface.routes_mut().update(|table| {
+                        let _ = table.push(smoltcp::iface::Route {
+                            cidr: smoltcp::wire::IpCidr::Ipv4(smoltcp::wire::Ipv4Cidr::new(
+                                smoltcp::wire::Ipv4Address::new(127, 0, 0, 0),
+                                8,
+                            )),
+                            via_router: smoltcp::wire::IpAddress::v4(127, 0, 0, 1),
+                            preferred_until: None,
+                            expires_at: None,
+                        });
+                    });
+                    if smol_iface
                         .routes_mut()
                         .add_default_ipv4_route(router)
-                        .unwrap();
-                    let cidr = net_face.inner_iface().lock().ip_addrs().first().cloned();
+                        .is_err()
+                    {
+                        log::warn!("Route table full");
+                    }
+                    let cidr = smol_iface.ip_addrs().first().cloned();
                     if let Some(cidr) = cidr {
                         // 这里先在这里将网卡设置为up,后面等netlink实现了再修改
                         net_face.set_operstate(Operstate::IF_OPER_UP);
@@ -96,7 +107,7 @@ fn dhcp_query() -> Result<(), SystemError> {
                     }
                 } else {
                     net_face
-                        .inner_iface()
+                        .smol_iface()
                         .lock()
                         .routes_mut()
                         .remove_default_ipv4_route();
@@ -112,7 +123,7 @@ fn dhcp_query() -> Result<(), SystemError> {
                     ))])
                     .ok();
                 net_face
-                    .inner_iface()
+                    .smol_iface()
                     .lock()
                     .routes_mut()
                     .remove_default_ipv4_route();
@@ -124,123 +135,109 @@ fn dhcp_query() -> Result<(), SystemError> {
 }
 
 pub fn poll_ifaces() {
-    let guard: RwLockReadGuard<BTreeMap<usize, Arc<dyn NetDevice>>> = NET_DEVICES.read_irqsave();
+    // log::debug!("poll_ifaces");
+    let guard: RwLockReadGuard<BTreeMap<usize, Arc<dyn Iface>>> = NET_DEVICES.read_irqsave();
     if guard.len() == 0 {
         warn!("poll_ifaces: No net driver found!");
         return;
     }
-    let mut sockets = SOCKET_SET.lock_irqsave();
     for (_, iface) in guard.iter() {
-        iface.poll(&mut sockets).ok();
-    }
-    let _ = send_event(&sockets);
-}
-
-/// 对ifaces进行轮询,最多对SOCKET_SET尝试times次加锁。
-///
-/// @return 轮询成功,返回Ok(())
-/// @return 加锁超时,返回SystemError::EAGAIN_OR_EWOULDBLOCK
-/// @return 没有网卡,返回SystemError::ENODEV
-pub fn poll_ifaces_try_lock(times: u16) -> Result<(), SystemError> {
-    let mut i = 0;
-    while i < times {
-        let guard: RwLockReadGuard<BTreeMap<usize, Arc<dyn NetDevice>>> =
-            NET_DEVICES.read_irqsave();
-        if guard.len() == 0 {
-            warn!("poll_ifaces: No net driver found!");
-            // 没有网卡,返回错误
-            return Err(SystemError::ENODEV);
-        }
-        let sockets = SOCKET_SET.try_lock_irqsave();
-        // 加锁失败,继续尝试
-        if sockets.is_err() {
-            i += 1;
-            continue;
-        }
-
-        let mut sockets = sockets.unwrap();
-        for (_, iface) in guard.iter() {
-            iface.poll(&mut sockets).ok();
-        }
-        send_event(&sockets)?;
-        return Ok(());
+        iface.poll();
     }
-    // 尝试次数用完,返回错误
-    return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
 }
 
-/// 对ifaces进行轮询,最多对SOCKET_SET尝试一次加锁。
-///
-/// @return 轮询成功,返回Ok(())
-/// @return 加锁超时,返回SystemError::EAGAIN_OR_EWOULDBLOCK
-/// @return 没有网卡,返回SystemError::ENODEV
-pub fn poll_ifaces_try_lock_onetime() -> Result<(), SystemError> {
-    let guard: RwLockReadGuard<BTreeMap<usize, Arc<dyn NetDevice>>> = NET_DEVICES.read_irqsave();
-    if guard.len() == 0 {
-        warn!("poll_ifaces: No net driver found!");
-        // 没有网卡,返回错误
-        return Err(SystemError::ENODEV);
-    }
-    let mut sockets = SOCKET_SET.try_lock_irqsave()?;
-    for (_, iface) in guard.iter() {
-        iface.poll(&mut sockets).ok();
-    }
-    send_event(&sockets)?;
-    return Ok(());
-}
-
-/// ### 处理轮询后的事件
-fn send_event(sockets: &smoltcp::iface::SocketSet) -> Result<(), SystemError> {
-    for (handle, socket_type) in sockets.iter() {
-        let handle_guard = HANDLE_MAP.read_irqsave();
-        let global_handle = GlobalSocketHandle::new_smoltcp_handle(handle);
-        let item: Option<&super::socket::SocketHandleItem> = handle_guard.get(&global_handle);
-        if item.is_none() {
-            continue;
-        }
-
-        let handle_item = item.unwrap();
-        let posix_item = handle_item.posix_item();
-        if posix_item.is_none() {
-            continue;
-        }
-        let posix_item = posix_item.unwrap();
-
-        // 获取socket上的事件
-        let mut events = SocketPollMethod::poll(socket_type, handle_item).bits() as u64;
-
-        // 分发到相应类型socket处理
-        match socket_type {
-            smoltcp::socket::Socket::Raw(_) | smoltcp::socket::Socket::Udp(_) => {
-                posix_item.wakeup_any(events);
-            }
-            smoltcp::socket::Socket::Icmp(_) => unimplemented!("Icmp socket hasn't unimplemented"),
-            smoltcp::socket::Socket::Tcp(inner_socket) => {
-                if inner_socket.is_active() {
-                    events |= TcpSocket::CAN_ACCPET;
-                }
-                if inner_socket.state() == smoltcp::socket::tcp::State::Established {
-                    events |= TcpSocket::CAN_CONNECT;
-                }
-                if inner_socket.state() == smoltcp::socket::tcp::State::CloseWait {
-                    events |= EPollEventType::EPOLLHUP.bits() as u64;
-                }
-
-                posix_item.wakeup_any(events);
-            }
-            smoltcp::socket::Socket::Dhcpv4(_) => {}
-            smoltcp::socket::Socket::Dns(_) => unimplemented!("Dns socket hasn't unimplemented"),
-        }
-        EventPoll::wakeup_epoll(
-            &posix_item.epitems,
-            EPollEventType::from_bits_truncate(events as u32),
-        )?;
-        drop(handle_guard);
-        // crate::debug!(
-        //     "{} send_event {:?}",
-        //     handle,
-        //     EPollEventType::from_bits_truncate(events as u32)
-        // );
-    }
-    Ok(())
-}
+// /// 对ifaces进行轮询,最多对SOCKET_SET尝试times次加锁。
+// ///
+// /// @return 轮询成功,返回Ok(())
+// /// @return 加锁超时,返回SystemError::EAGAIN_OR_EWOULDBLOCK
+// /// @return 没有网卡,返回SystemError::ENODEV
+// pub fn poll_ifaces_try_lock(times: u16) -> Result<(), SystemError> {
+//     let mut i = 0;
+//     while i < times {
+//         let guard: RwLockReadGuard<BTreeMap<usize, Arc<dyn Iface>>> =
+//             NET_DEVICES.read_irqsave();
+//         if guard.len() == 0 {
+//             warn!("poll_ifaces: No net driver found!");
+//             // 没有网卡,返回错误
+//             return Err(SystemError::ENODEV);
+//         }
+//         for (_, iface) in guard.iter() {
+//             iface.poll();
+//         }
+//         return Ok(());
+//     }
+//     // 尝试次数用完,返回错误
+//     return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
+// }
+
+// /// 对ifaces进行轮询,最多对SOCKET_SET尝试一次加锁。
+// ///
+// /// @return 轮询成功,返回Ok(())
+// /// @return 加锁超时,返回SystemError::EAGAIN_OR_EWOULDBLOCK
+// /// @return 没有网卡,返回SystemError::ENODEV
+// pub fn poll_ifaces_try_lock_onetime() -> Result<(), SystemError> {
+//     let guard: RwLockReadGuard<BTreeMap<usize, Arc<dyn Iface>>> = NET_DEVICES.read_irqsave();
+//     if guard.len() == 0 {
+//         warn!("poll_ifaces: No net driver found!");
+//         // 没有网卡,返回错误
+//         return Err(SystemError::ENODEV);
+//     }
+//     for (_, iface) in guard.iter() {
+//         let _ = iface.poll();
+//     }
+//     send_event()?;
+//     return Ok(());
+// }
+
+// /// ### 处理轮询后的事件
+// fn send_event() -> Result<(), SystemError> {
+//     for (handle, socket_type) in .lock().iter() {
+
+//         let global_handle = GlobalSocketHandle::new_smoltcp_handle(handle);
+
+//         let handle_guard = HANDLE_MAP.read_irqsave();
+//         let item: Option<&super::socket::SocketHandleItem> = handle_guard.get(&global_handle);
+//         if item.is_none() {
+//             continue;
+//         }
+
+//         let handle_item = item.unwrap();
+//         let posix_item = handle_item.posix_item();
+//         if posix_item.is_none() {
+//             continue;
+//         }
+//         let posix_item = posix_item.unwrap();
+
+//         // 获取socket上的事件
+//         let mut events = SocketPollMethod::poll(socket_type, handle_item).bits() as u64;
+
+//         // 分发到相应类型socket处理
+//         match socket_type {
+//             smoltcp::socket::Socket::Raw(_) | smoltcp::socket::Socket::Udp(_) => {
+//                 posix_item.wakeup_any(events);
+//             }
+//             smoltcp::socket::Socket::Icmp(_) => unimplemented!("Icmp socket hasn't unimplemented"),
+//             smoltcp::socket::Socket::Tcp(inner_socket) => {
+//                 if inner_socket.is_active() {
+//                     events |= TcpSocket::CAN_ACCPET;
+//                 }
+//                 if inner_socket.state() == smoltcp::socket::tcp::State::Established {
+//                     events |= TcpSocket::CAN_CONNECT;
+//                 }
+//                 if inner_socket.state() == smoltcp::socket::tcp::State::CloseWait {
+//                     events |= EPollEventType::EPOLLHUP.bits() as u64;
+//                 }
+
+//                 posix_item.wakeup_any(events);
+//             }
+//             smoltcp::socket::Socket::Dhcpv4(_) => {}
+//             smoltcp::socket::Socket::Dns(_) => unimplemented!("Dns socket hasn't unimplemented"),
+//         }
+//         EventPoll::wakeup_epoll(
+//             &posix_item.epitems,
+//             EPollEventType::from_bits_truncate(events as u32),
+//         )?;
+//         drop(handle_guard);
+//     }
+//     Ok(())
+// }

+ 143 - 0
kernel/src/net/socket/base.rs

@@ -0,0 +1,143 @@
+#![allow(unused_variables)]
+
+use crate::net::socket::*;
+use crate::net::syscall_util::MsgHdr;
+use alloc::sync::Arc;
+use core::any::Any;
+use core::fmt::Debug;
+use system_error::SystemError::{self, *};
+
+/// # `Socket` methods
+/// ## Reference
+/// - [Posix standard](https://pubs.opengroup.org/onlinepubs/9699919799/)
+pub trait Socket: Sync + Send + Debug + Any {
+    /// # `wait_queue`
+    /// 获取socket的wait queue
+    fn wait_queue(&self) -> &WaitQueue;
+    /// # `socket_poll`
+    /// 获取socket的事件。
+    fn poll(&self) -> usize;
+
+    fn send_buffer_size(&self) -> usize;
+    fn recv_buffer_size(&self) -> usize;
+    /// # `accept`
+    /// 接受连接,仅用于listening stream socket
+    /// ## Block
+    /// 如果没有连接到来,会阻塞
+    fn accept(&self) -> Result<(Arc<Inode>, Endpoint), SystemError> {
+        Err(ENOSYS)
+    }
+    /// # `bind`
+    /// 对应于POSIX的bind函数,用于绑定到本机指定的端点
+    fn bind(&self, endpoint: Endpoint) -> Result<(), SystemError> {
+        Err(ENOSYS)
+    }
+    /// # `close`
+    /// 关闭socket
+    fn close(&self) -> Result<(), SystemError> {
+        Ok(())
+    }
+    /// # `connect`
+    /// 对应于POSIX的connect函数,用于连接到指定的远程服务器端点
+    fn connect(&self, endpoint: Endpoint) -> Result<(), SystemError> {
+        Err(ENOSYS)
+    }
+    // fnctl
+    // freeaddrinfo
+    // getaddrinfo
+    // getnameinfo
+    /// # `get_peer_name`
+    /// 获取对端的地址
+    fn get_peer_name(&self) -> Result<Endpoint, SystemError> {
+        Err(ENOSYS)
+    }
+    /// # `get_name`
+    /// 获取socket的地址
+    fn get_name(&self) -> Result<Endpoint, SystemError> {
+        Err(ENOSYS)
+    }
+    /// # `get_option`
+    /// 对应于 Posix `getsockopt` ,获取socket选项
+    fn get_option(
+        &self,
+        level: OptionsLevel,
+        name: usize,
+        value: &mut [u8],
+    ) -> Result<usize, SystemError> {
+        log::warn!("getsockopt is not implemented");
+        Ok(0)
+    }
+    /// # `listen`
+    /// 监听socket,仅用于stream socket
+    fn listen(&self, backlog: usize) -> Result<(), SystemError> {
+        Err(ENOSYS)
+    }
+    // poll
+    // pselect
+    /// # `read`
+    fn read(&self, buffer: &mut [u8]) -> Result<usize, SystemError> {
+        self.recv(buffer, MessageFlag::empty())
+    }
+    /// # `recv`
+    /// 接收数据,`read` = `recv` with flags = 0
+    fn recv(&self, buffer: &mut [u8], flags: MessageFlag) -> Result<usize, SystemError> {
+        Err(ENOSYS)
+    }
+    /// # `recv_from`
+    fn recv_from(
+        &self,
+        buffer: &mut [u8],
+        flags: MessageFlag,
+        address: Option<Endpoint>,
+    ) -> Result<(usize, Endpoint), SystemError> {
+        Err(ENOSYS)
+    }
+    /// # `recv_msg`
+    fn recv_msg(&self, msg: &mut MsgHdr, flags: MessageFlag) -> Result<usize, SystemError> {
+        Err(ENOSYS)
+    }
+    // select
+    /// # `send`
+    fn send(&self, buffer: &[u8], flags: MessageFlag) -> Result<usize, SystemError> {
+        Err(ENOSYS)
+    }
+    /// # `send_msg`
+    fn send_msg(&self, msg: &MsgHdr, flags: MessageFlag) -> Result<usize, SystemError> {
+        Err(ENOSYS)
+    }
+    /// # `send_to`
+    fn send_to(
+        &self,
+        buffer: &[u8],
+        flags: MessageFlag,
+        address: Endpoint,
+    ) -> Result<usize, SystemError> {
+        Err(ENOSYS)
+    }
+    /// # `set_option`
+    /// Posix `setsockopt` ,设置socket选项
+    /// ## Parameters
+    /// - level 选项的层次
+    /// - name 选项的名称
+    /// - value 选项的值
+    /// ## Reference
+    /// https://code.dragonos.org.cn/s?refs=sk_setsockopt&project=linux-6.6.21
+    fn set_option(&self, level: OptionsLevel, name: usize, val: &[u8]) -> Result<(), SystemError> {
+        log::warn!("setsockopt is not implemented");
+        Ok(())
+    }
+    /// # `shutdown`
+    fn shutdown(&self, how: ShutdownTemp) -> Result<(), SystemError> {
+        Err(ENOSYS)
+    }
+    // sockatmark
+    // socket
+    // socketpair
+    /// # `write`
+    fn write(&self, buffer: &[u8]) -> Result<usize, SystemError> {
+        self.send(buffer, MessageFlag::empty())
+    }
+    // fn write_buffer(&self, _buf: &[u8]) -> Result<usize, SystemError> {
+    //     todo!()
+    // }
+}

+ 91 - 0
kernel/src/net/socket/buffer.rs

@@ -0,0 +1,91 @@
+use alloc::vec::Vec;
+
+use alloc::{string::String, sync::Arc};
+use log::debug;
+use system_error::SystemError;
+
+use crate::libs::spinlock::SpinLock;
+
+#[derive(Debug)]
+pub struct Buffer {
+    metadata: Metadata,
+    read_buffer: SpinLock<Vec<u8>>,
+    write_buffer: SpinLock<Vec<u8>>,
+}
+
+impl Buffer {
+    pub fn new() -> Arc<Self> {
+        Arc::new(Self {
+            metadata: Metadata::default(),
+            read_buffer: SpinLock::new(Vec::new()),
+            write_buffer: SpinLock::new(Vec::new()),
+        })
+    }
+
+    pub fn is_read_buf_empty(&self) -> bool {
+        return self.read_buffer.lock().is_empty();
+    }
+
+    pub fn is_read_buf_full(&self) -> bool {
+        return self.metadata.buf_size - self.read_buffer.lock().len() == 0;
+    }
+
+    pub fn is_write_buf_empty(&self) -> bool {
+        return self.write_buffer.lock().is_empty();
+    }
+
+    pub fn is_write_buf_full(&self) -> bool {
+        return self.write_buffer.lock().len() >= self.metadata.buf_size;
+    }
+
+    pub fn read_read_buffer(&self, buf: &mut [u8]) -> Result<usize, SystemError> {
+        let mut read_buffer = self.read_buffer.lock_irqsave();
+        let len = core::cmp::min(buf.len(), read_buffer.len());
+        buf[..len].copy_from_slice(&read_buffer[..len]);
+        let _ = read_buffer.split_off(len);
+        log::debug!("recv buf {}", String::from_utf8_lossy(buf));
+
+        return Ok(len);
+    }
+
+    pub fn write_read_buffer(&self, buf: &[u8]) -> Result<usize, SystemError> {
+        let mut buffer = self.read_buffer.lock_irqsave();
+        log::debug!("send buf {}", String::from_utf8_lossy(buf));
+        let len = buf.len();
+        if self.metadata.buf_size - buffer.len() < len {
+            return Err(SystemError::ENOBUFS);
+        }
+        buffer.extend_from_slice(buf);
+
+        Ok(len)
+    }
+
+    pub fn write_write_buffer(&self, buf: &[u8]) -> Result<usize, SystemError> {
+        let mut buffer = self.write_buffer.lock_irqsave();
+
+        let len = buf.len();
+        if self.metadata.buf_size - buffer.len() < len {
+            return Err(SystemError::ENOBUFS);
+        }
+        buffer.extend_from_slice(buf);
+
+        Ok(len)
+    }
+}
+
+#[derive(Debug)]
+pub struct Metadata {
+    /// 默认的元数据缓冲区大小
+    metadata_buf_size: usize,
+    /// 默认的缓冲区大小
+    buf_size: usize,
+}
+
+impl Default for Metadata {
+    fn default() -> Self {
+        Self {
+            metadata_buf_size: 1024,
+            buf_size: 64 * 1024,
+        }
+    }
+}

+ 64 - 0
kernel/src/net/socket/common/epoll_items.rs

@@ -0,0 +1,64 @@
+use alloc::{
+    collections::LinkedList,
+    sync::{Arc, Weak},
+    vec::Vec,
+};
+use system_error::SystemError;
+
+use crate::{
+    libs::{spinlock::SpinLock, wait_queue::EventWaitQueue},
+    net::event_poll::{EPollEventType, EPollItem, EventPoll},
+    process::ProcessManager,
+    sched::{schedule, SchedMode},
+};
+
+#[derive(Debug, Clone)]
+pub struct EPollItems {
+    items: Arc<SpinLock<LinkedList<Arc<EPollItem>>>>,
+}
+
+impl Default for EPollItems {
+    fn default() -> Self {
+        Self {
+            items: Arc::new(SpinLock::new(LinkedList::new())),
+        }
+    }
+}
+
+impl EPollItems {
+    pub fn add(&self, item: Arc<EPollItem>) {
+        self.items.lock_irqsave().push_back(item);
+    }
+
+    pub fn remove(&self, item: &Weak<SpinLock<EventPoll>>) -> Result<(), SystemError> {
+        let to_remove = self
+            .items
+            .lock_irqsave()
+            .extract_if(|x| x.epoll().ptr_eq(item))
+            .collect::<Vec<_>>();
+
+        let result = if !to_remove.is_empty() {
+            Ok(())
+        } else {
+            Err(SystemError::ENOENT)
+        };
+
+        drop(to_remove);
+        return result;
+    }
+
+    pub fn clear(&self) -> Result<(), SystemError> {
+        let mut guard = self.items.lock_irqsave();
+        let mut result = Ok(());
+        guard.iter().for_each(|item| {
+            if let Some(epoll) = item.epoll().upgrade() {
+                let _ =
+                    EventPoll::ep_remove(&mut epoll.lock_irqsave(), item.fd(), None).map_err(|e| {
+                        result = Err(e);
+                    });
+            }
+        });
+        guard.clear();
+        return result;
+    }
+}

+ 20 - 0
kernel/src/net/socket/common/mod.rs

@@ -0,0 +1,20 @@
+// pub mod poll_unit;
+mod epoll_items;
+pub mod shutdown;
+
+pub use epoll_items::EPollItems;
+#[allow(dead_code)]
+pub use shutdown::Shutdown;
+
+// /// @brief 在trait Socket的metadata函数中返回该结构体供外部使用
+// #[derive(Debug, Clone)]
+// pub struct Metadata {
+//     /// 接收缓冲区的大小
+//     pub rx_buf_size: usize,
+//     /// 发送缓冲区的大小
+//     pub tx_buf_size: usize,
+//     /// 元数据的缓冲区的大小
+//     pub metadata_buf_size: usize,
+//     /// socket的选项
+//     pub options: SocketOptions,
+// }

+ 72 - 0
kernel/src/net/socket/common/poll_unit.rs

@@ -0,0 +1,72 @@
+use alloc::{
+    collections::LinkedList,
+    sync::{Arc, Weak},
+    vec::Vec,
+};
+use system_error::SystemError;
+
+use crate::{
+    libs::{spinlock::SpinLock, wait_queue::EventWaitQueue},
+    net::event_poll::{EPollEventType, EPollItem, EventPoll},
+    process::ProcessManager,
+    sched::{schedule, SchedMode},
+};
+
+#[derive(Debug, Clone)]
+pub struct WaitQueue {
+    /// socket的waitqueue
+    wait_queue: Arc<EventWaitQueue>,
+}
+
+impl Default for WaitQueue {
+    fn default() -> Self {
+        Self {
+            wait_queue: Default::default(),
+        }
+    }
+}
+
+impl WaitQueue {
+    pub fn new(wait_queue: EventWaitQueue) -> Self {
+        Self {
+            wait_queue: Arc::new(wait_queue),
+        }
+    }
+
+    /// # `wakeup_any`
+    /// 唤醒该队列上等待events的进程
+    /// ## 参数
+    /// - events: 发生的事件
+    /// 需要注意的是,只要触发了events中的任意一件事件,进程都会被唤醒
+    pub fn wakeup_any(&self, events: EPollEventType) {
+        self.wait_queue.wakeup_any(events.bits() as u64);
+    }
+
+    /// # `wait_for`
+    /// 等待events事件发生
+    pub fn wait_for(&self, events: EPollEventType) {
+        unsafe {
+            ProcessManager::preempt_disable();
+            self.wait_queue.sleep_without_schedule(events.bits() as u64);
+            ProcessManager::preempt_enable();
+        }
+        schedule(SchedMode::SM_NONE);
+    }
+
+    /// # `busy_wait`
+    /// 轮询一个会返回EPAGAIN_OR_EWOULDBLOCK的函数
+    pub fn busy_wait<F, R>(&self, events: EPollEventType, mut f: F) -> Result<R, SystemError>
+    where
+        F: FnMut() -> Result<R, SystemError>,
+    {
+        loop {
+            match f() {
+                Ok(r) => return Ok(r),
+                Err(SystemError::EAGAIN_OR_EWOULDBLOCK) => {
+                    self.wait_for(events);
+                }
+                Err(e) => return Err(e),
+            }
+        }
+    }
+}

+ 118 - 0
kernel/src/net/socket/common/shutdown.rs

@@ -0,0 +1,118 @@
+use core::sync::atomic::AtomicU8;
+
+bitflags! {
+    /// @brief 用于指定socket的关闭类型
+    /// 参考:https://code.dragonos.org.cn/xref/linux-6.1.9/include/net/sock.h?fi=SHUTDOWN_MASK#1573
+    pub struct ShutdownBit: u8 {
+        const SHUT_RD = 0;
+        const SHUT_WR = 1;
+        const SHUT_RDWR = 2;
+    }
+}
+
+const RCV_SHUTDOWN: u8 = 0x01;
+const SEND_SHUTDOWN: u8 = 0x02;
+const SHUTDOWN_MASK: u8 = 0x03;
+
+#[derive(Debug, Default)]
+pub struct Shutdown {
+    bit: AtomicU8,
+}
+
+impl From<ShutdownBit> for Shutdown {
+    fn from(shutdown_bit: ShutdownBit) -> Self {
+        match shutdown_bit {
+            ShutdownBit::SHUT_RD => Shutdown {
+                bit: AtomicU8::new(RCV_SHUTDOWN),
+            },
+            ShutdownBit::SHUT_WR => Shutdown {
+                bit: AtomicU8::new(SEND_SHUTDOWN),
+            },
+            ShutdownBit::SHUT_RDWR => Shutdown {
+                bit: AtomicU8::new(SHUTDOWN_MASK),
+            },
+            _ => Shutdown::default(),
+        }
+    }
+}
+
+impl Shutdown {
+    pub fn new() -> Self {
+        Self {
+            bit: AtomicU8::new(0),
+        }
+    }
+
+    pub fn recv_shutdown(&self) {
+        self.bit
+            .fetch_or(RCV_SHUTDOWN, core::sync::atomic::Ordering::SeqCst);
+    }
+
+    pub fn send_shutdown(&self) {
+        self.bit
+            .fetch_or(SEND_SHUTDOWN, core::sync::atomic::Ordering::SeqCst);
+    }
+
+    // pub fn is_recv_shutdown(&self) -> bool {
+    //     self.bit.load(core::sync::atomic::Ordering::SeqCst) & RCV_SHUTDOWN != 0
+    // }
+
+    // pub fn is_send_shutdown(&self) -> bool {
+    //     self.bit.load(core::sync::atomic::Ordering::SeqCst) & SEND_SHUTDOWN != 0
+    // }
+
+    // pub fn is_both_shutdown(&self) -> bool {
+    //     self.bit.load(core::sync::atomic::Ordering::SeqCst) & SHUTDOWN_MASK == SHUTDOWN_MASK
+    // }
+
+    // pub fn is_empty(&self) -> bool {
+    //     self.bit.load(core::sync::atomic::Ordering::SeqCst) == 0
+    // }
+
+    pub fn from_how(how: usize) -> Self {
+        Self::from(ShutdownBit::from_bits_truncate(how as u8))
+    }
+
+    pub fn get(&self) -> ShutdownTemp {
+        ShutdownTemp {
+            bit: self.bit.load(core::sync::atomic::Ordering::SeqCst),
+        }
+    }
+}
+
+pub struct ShutdownTemp {
+    bit: u8,
+}
+
+impl ShutdownTemp {
+    pub fn is_recv_shutdown(&self) -> bool {
+        self.bit & RCV_SHUTDOWN != 0
+    }
+
+    pub fn is_send_shutdown(&self) -> bool {
+        self.bit & SEND_SHUTDOWN != 0
+    }
+
+    pub fn is_both_shutdown(&self) -> bool {
+        self.bit & SHUTDOWN_MASK == SHUTDOWN_MASK
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.bit == 0
+    }
+
+    pub fn from_how(how: usize) -> Self {
+        Self { bit: how as u8 + 1 }
+    }
+}
+
+impl From<ShutdownBit> for ShutdownTemp {
+    fn from(shutdown_bit: ShutdownBit) -> Self {
+        match shutdown_bit {
+            ShutdownBit::SHUT_RD => Self { bit: RCV_SHUTDOWN },
+            ShutdownBit::SHUT_WR => Self { bit: SEND_SHUTDOWN },
+            ShutdownBit::SHUT_RDWR => Self { bit: SHUTDOWN_MASK },
+            _ => Self { bit: 0 },
+        }
+    }
+}

+ 76 - 0
kernel/src/net/socket/define/ipproto.rs

@@ -0,0 +1,76 @@
+const SOL_SOCKET: u16 = 1;
+
+#[derive(Debug, Clone, Copy, FromPrimitive, ToPrimitive, PartialEq, Eq)]
+pub enum IPProtocol {
+    /// Dummy protocol for TCP.
+    IP = 0,
+    /// Internet Control Message Protocol.
+    ICMP = 1,
+    /// Internet Group Management Protocol.
+    IGMP = 2,
+    /// IPIP tunnels (older KA9Q tunnels use 94).
+    IPIP = 4,
+    /// Transmission Control Protocol.
+    TCP = 6,
+    /// Exterior Gateway Protocol.
+    EGP = 8,
+    /// PUP protocol.
+    PUP = 12,
+    /// User Datagram Protocol.
+    UDP = 17,
+    /// XNS IDP protocol.
+    IDP = 22,
+    /// SO Transport Protocol Class 4.
+    TP = 29,
+    /// Datagram Congestion Control Protocol.
+    DCCP = 33,
+    /// IPv6-in-IPv4 tunnelling.
+    IPv6 = 41,
+    /// RSVP Protocol.
+    RSVP = 46,
+    /// Generic Routing Encapsulation. (Cisco GRE) (rfc 1701, 1702)
+    GRE = 47,
+    /// Encapsulation Security Payload protocol
+    ESP = 50,
+    /// Authentication Header protocol
+    AH = 51,
+    /// Multicast Transport Protocol.
+    MTP = 92,
+    /// IP option pseudo header for BEET
+    BEETPH = 94,
+    /// Encapsulation Header.
+    ENCAP = 98,
+    /// Protocol Independent Multicast.
+    PIM = 103,
+    /// Compression Header Protocol.
+    COMP = 108,
+    /// Stream Control Transport Protocol
+    SCTP = 132,
+    /// UDP-Lite protocol (RFC 3828)
+    UDPLITE = 136,
+    /// MPLS in IP (RFC 4023)
+    MPLSINIP = 137,
+    /// Ethernet-within-IPv6 Encapsulation
+    ETHERNET = 143,
+    /// Raw IP packets
+    RAW = 255,
+    /// Multipath TCP connection
+    MPTCP = 262,
+}
+
+impl TryFrom<u16> for IPProtocol {
+    type Error = system_error::SystemError;
+
+    fn try_from(value: u16) -> Result<Self, Self::Error> {
+        match <Self as num_traits::FromPrimitive>::from_u16(value) {
+            Some(p) => Ok(p),
+            None => Err(system_error::SystemError::EPROTONOSUPPORT),
+        }
+    }
+}
+
+impl From<IPProtocol> for u16 {
+    fn from(value: IPProtocol) -> Self {
+        <IPProtocol as num_traits::ToPrimitive>::to_u16(&value).unwrap()
+    }
+}

+ 32 - 0
kernel/src/net/socket/define/mod.rs

@@ -0,0 +1,32 @@
+mod option;
+pub use option::Options;
+
+mod option_level;
+pub use option_level::OptionsLevel;
+
+mod msg_flag;
+pub use msg_flag::MessageFlag;
+
+mod ipproto;
+pub use ipproto::IPProtocol;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, FromPrimitive, ToPrimitive)]
+pub enum Type {
+    Stream = 1,
+    Datagram = 2,
+    Raw = 3,
+    RDM = 4,
+    SeqPacket = 5,
+    DCCP = 6,
+    Packet = 10,
+}
+
+use crate::net::syscall_util::SysArgSocketType;
+impl TryFrom<SysArgSocketType> for Type {
+    type Error = system_error::SystemError;
+    fn try_from(x: SysArgSocketType) -> Result<Self, Self::Error> {
+        use num_traits::FromPrimitive;
+        return <Self as FromPrimitive>::from_u32(x.types().bits())
+            .ok_or(system_error::SystemError::EINVAL);
+    }
+}

+ 110 - 0
kernel/src/net/socket/define/msg_flag.rs

@@ -0,0 +1,110 @@
+bitflags::bitflags! {
+    /// # Message Flags
+    /// Flags we can use with send/ and recv. \
+    /// Added those for 1003.1g not all are supported yet
+    /// ## Reference
+    /// - [Linux Socket Flags](https://code.dragonos.org.cn/xref/linux-6.6.21/include/linux/socket.h#299)
+    pub struct MessageFlag: u32 {
+        /// `MSG_OOB`
+        /// `0b0000_0001`\
+        /// Process out-of-band data.
+        const OOB       = 1;
+        /// `MSG_PEEK`
+        /// `0b0000_0010`\
+        /// Peek at an incoming message.
+        const PEEK      = 2;
+        /// `MSG_DONTROUTE`
+        /// `0b0000_0100`\
+        /// Don't use routing tables.
+        const DONTROUTE = 4;
+        /// `MSG_TRYHARD`
+        /// `0b0000_0100`\
+        /// `MSG_TRYHARD` is not defined in the standard, but it is used in Linux.
+        const TRYHARD   = 4;
+        /// `MSG_CTRUNC`
+        /// `0b0000_1000`\
+        /// Control data lost before delivery.
+        const CTRUNC     = 8;
+        /// `MSG_PROBE`
+        /// `0b0001_0000`\
+        const PROBE     = 0x10;
+        /// `MSG_TRUNC`
+        /// `0b0010_0000`\
+        /// Data truncated before delivery.
+        const TRUNC     = 0x20;
+        /// `MSG_DONTWAIT`
+        /// `0b0100_0000`\
+        /// This flag is used to make the socket non-blocking.
+        const DONTWAIT  = 0x40;
+        /// `MSG_EOR`
+        /// `0b1000_0000`\
+        /// End of record.
+        const EOR       = 0x80;
+        /// `MSG_WAITALL`
+        /// `0b0001_0000_0000`\
+        /// Wait for full request or error.
+        const WAITALL   = 0x100;
+        /// `MSG_FIN`
+        /// `0b0010_0000_0000`\
+        /// Terminate the connection.
+        const FIN       = 0x200;
+        /// `MSG_SYN`
+        /// `0b0100_0000_0000`\
+        /// Synchronize sequence numbers.
+        const SYN       = 0x400;
+        /// `MSG_CONFIRM`
+        /// `0b1000_0000_0000`\
+        /// Confirm path validity.
+        const CONFIRM   = 0x800;
+        /// `MSG_RST`
+        /// `0b0001_0000_0000_0000`\
+        /// Reset the connection.
+        const RST       = 0x1000;
+        /// `MSG_ERRQUEUE`
+        /// `0b0010_0000_0000_0000`\
+        /// Fetch message from error queue.
+        const ERRQUEUE  = 0x2000;
+        /// `MSG_NOSIGNAL`
+        /// `0b0100_0000_0000_0000`\
+        /// Do not generate a signal.
+        const NOSIGNAL  = 0x4000;
+        /// `MSG_MORE`
+        /// `0b1000_0000_0000_0000`\
+        /// Sender will send more.
+        const MORE      = 0x8000;
+        /// `MSG_WAITFORONE`
+        /// `0b0001_0000_0000_0000_0000`\
+        /// For nonblocking operation.
+        const WAITFORONE = 0x10000;
+        /// `MSG_SENDPAGE_NOPOLICY`
+        /// `0b0010_0000_0000_0000_0000`\
+        /// Sendpage: do not apply policy.
+        const SENDPAGE_NOPOLICY = 0x10000;
+        /// `MSG_BATCH`
+        /// `0b0100_0000_0000_0000_0000`\
+        /// Sendpage: next message is batch.
+        const BATCH     = 0x40000;
+        /// `MSG_EOF`
+        const EOF       = Self::FIN.bits;
+        /// `MSG_NO_SHARED_FRAGS`
+        const NO_SHARED_FRAGS = 0x80000;
+        /// `MSG_SENDPAGE_DECRYPTED`
+        const SENDPAGE_DECRYPTED = 0x10_0000;
+
+        /// `MSG_ZEROCOPY`
+        const ZEROCOPY      = 0x400_0000;
+        /// `MSG_SPLICE_PAGES`
+        const SPLICE_PAGES  = 0x800_0000;
+        /// `MSG_FASTOPEN`
+        const FASTOPEN      = 0x2000_0000;
+        /// `MSG_CMSG_CLOEXEC`
+        const CMSG_CLOEXEC  = 0x4000_0000;
+        /// `MSG_CMSG_COMPAT`
+        // if define CONFIG_COMPAT
+        // const CMSG_COMPAT   = 0x8000_0000;
+        const CMSG_COMPAT   = 0;
+        /// `MSG_INTERNAL_SENDMSG_FLAGS`
+        const INTERNAL_SENDMSG_FLAGS
+            = Self::SPLICE_PAGES.bits | Self::SENDPAGE_NOPOLICY.bits | Self::SENDPAGE_DECRYPTED.bits;
+    }
+}

+ 92 - 0
kernel/src/net/socket/define/option.rs

@@ -0,0 +1,92 @@
+#[derive(Debug, Clone, Copy, PartialEq, Eq, FromPrimitive, ToPrimitive)]
+#[allow(non_camel_case_types)]
+pub enum Options {
+    DEBUG = 1,
+    REUSEADDR = 2,
+    TYPE = 3,
+    ERROR = 4,
+    DONTROUTE = 5,
+    BROADCAST = 6,
+    SNDBUF = 7,
+    RCVBUF = 8,
+    SNDBUFFORCE = 32,
+    RCVBUFFORCE = 33,
+    KEEPALIVE = 9,
+    OOBINLINE = 10,
+    NO_CHECK = 11,
+    PRIORITY = 12,
+    LINGER = 13,
+    BSDCOMPAT = 14,
+    REUSEPORT = 15,
+    PASSCRED = 16,
+    PEERCRED = 17,
+    RCVLOWAT = 18,
+    SNDLOWAT = 19,
+    RCVTIMEO_OLD = 20,
+    SNDTIMEO_OLD = 21,
+    SECURITY_AUTHENTICATION = 22,
+    SECURITY_ENCRYPTION_TRANSPORT = 23,
+    SECURITY_ENCRYPTION_NETWORK = 24,
+    BINDTODEVICE = 25,
+    /// 与GET_FILTER相同
+    ATTACH_FILTER = 26,
+    DETACH_FILTER = 27,
+    PEERNAME = 28,
+    ACCEPTCONN = 30,
+    PEERSEC = 31,
+    PASSSEC = 34,
+    MARK = 36,
+    PROTOCOL = 38,
+    DOMAIN = 39,
+    RXQ_OVFL = 40,
+    /// 与SCM_WIFI_STATUS相同
+    WIFI_STATUS = 41,
+    PEEK_OFF = 42,
+    /* Instruct lower device to use last 4-bytes of skb data as FCS */
+    NOFCS = 43,
+    LOCK_FILTER = 44,
+    SELECT_ERR_QUEUE = 45,
+    BUSY_POLL = 46,
+    MAX_PACING_RATE = 47,
+    BPF_EXTENSIONS = 48,
+    INCOMING_CPU = 49,
+    ATTACH_BPF = 50,
+    // DETACH_BPF = DETACH_FILTER,
+    ATTACH_REUSEPORT_CBPF = 51,
+    ATTACH_REUSEPORT_EBPF = 52,
+    CNX_ADVICE = 53,
+    SCM_TIMESTAMPING_OPT_STATS = 54,
+    MEMINFO = 55,
+    INCOMING_NAPI_ID = 56,
+    COOKIE = 57,
+    SCM_TIMESTAMPING_PKTINFO = 58,
+    PEERGROUPS = 59,
+    ZEROCOPY = 60,
+    /// 与SCM_TXTIME相同
+    TXTIME = 61,
+    BINDTOIFINDEX = 62,
+    TIMESTAMP_OLD = 29,
+    TIMESTAMPNS_OLD = 35,
+    TIMESTAMPING_OLD = 37,
+    TIMESTAMP_NEW = 63,
+    TIMESTAMPNS_NEW = 64,
+    TIMESTAMPING_NEW = 65,
+    RCVTIMEO_NEW = 66,
+    SNDTIMEO_NEW = 67,
+    DETACH_REUSEPORT_BPF = 68,
+    PREFER_BUSY_POLL = 69,
+    BUSY_POLL_BUDGET = 70,
+    NETNS_COOKIE = 71,
+    BUF_LOCK = 72,
+    RESERVE_MEM = 73,
+    TXREHASH = 74,
+    RCVMARK = 75,
+}
+
+impl TryFrom<u32> for Options {
+    type Error = system_error::SystemError;
+    fn try_from(x: u32) -> Result<Self, Self::Error> {
+        use num_traits::FromPrimitive;
+        return <Self as FromPrimitive>::from_u32(x).ok_or(system_error::SystemError::EINVAL);
+    }
+}

+ 115 - 0
kernel/src/net/socket/define/option_level.rs

@@ -0,0 +1,115 @@
+// pub const SOL_SOCKET: u8 = 1,
+// bitflags::bitflags! {
+//     pub struct OptionsLevel: u32 {
+//         const IP = 0,
+//         // const SOL_ICMP = 1, // No-no-no! Due to Linux :-) we cannot
+//         const SOCKET = 1,
+//         const TCP = 6,
+//         const UDP = 17,
+//         const IPV6 = 41,
+//         const ICMPV6 = 58,
+//         const SCTP = 132,
+//         const UDPLITE = 136, // UDP-Lite (RFC 3828)
+//         const RAW = 255,
+//         const IPX = 256,
+//         const AX25 = 257,
+//         const ATALK = 258,
+//         const NETROM = 259,
+//         const ROSE = 260,
+//         const DECNET = 261,
+//         const X25 = 262,
+//         const PACKET = 263,
+//         const ATM = 264, // ATM layer (cell level)
+//         const AAL = 265, // ATM Adaption Layer (packet level)
+//         const IRDA = 266,
+//         const NETBEUI = 267,
+//         const LLC = 268,
+//         const DCCP = 269,
+//         const NETLINK = 270,
+//         const TIPC = 271,
+//         const RXRPC = 272,
+//         const PPPOL2TP = 273,
+//         const BLUETOOTH = 274,
+//         const PNPIPE = 275,
+//         const RDS = 276,
+//         const IUCV = 277,
+//         const CAIF = 278,
+//         const ALG = 279,
+//         const NFC = 280,
+//         const KCM = 281,
+//         const TLS = 282,
+//         const XDP = 283,
+//         const MPTCP = 284,
+//         const MCTP = 285,
+//         const SMC = 286,
+//         const VSOCK = 287,
+//     }
+// }
+
+/// # SOL (Socket Option Level)
+/// Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx
+/// ## Reference
+/// - [Setsockoptions(2) level](https://code.dragonos.org.cn/xref/linux-6.6.21/include/linux/socket.h#345)
+#[derive(Debug, Clone, Copy, PartialEq, Eq, FromPrimitive, ToPrimitive)]
+#[allow(non_camel_case_types)]
+pub enum OptionsLevel {
+    IP = 0,
+    SOCKET = 1,
+    // ICMP = 1, No-no-no! Due to Linux :-) we cannot
+    TCP = 6,
+    UDP = 17,
+    IPV6 = 41,
+    ICMPV6 = 58,
+    SCTP = 132,
+    UDPLITE = 136, // UDP-Lite (RFC 3828)
+    RAW = 255,
+    IPX = 256,
+    AX25 = 257,
+    ATALK = 258,
+    NETROM = 259,
+    ROSE = 260,
+    DECNET = 261,
+    X25 = 262,
+    PACKET = 263,
+    ATM = 264, // ATM layer (cell level)
+    AAL = 265, // ATM Adaption Layer (packet level)
+    IRDA = 266,
+    NETBEUI = 267,
+    LLC = 268,
+    DCCP = 269,
+    NETLINK = 270,
+    TIPC = 271,
+    RXRPC = 272,
+    PPPOL2TP = 273,
+    BLUETOOTH = 274,
+    PNPIPE = 275,
+    RDS = 276,
+    IUCV = 277,
+    CAIF = 278,
+    ALG = 279,
+    NFC = 280,
+    KCM = 281,
+    TLS = 282,
+    XDP = 283,
+    MPTCP = 284,
+    MCTP = 285,
+    SMC = 286,
+    VSOCK = 287,
+}
+
+impl TryFrom<u32> for OptionsLevel {
+    type Error = system_error::SystemError;
+
+    fn try_from(value: u32) -> Result<Self, Self::Error> {
+        match <Self as num_traits::FromPrimitive>::from_u32(value) {
+            Some(p) => Ok(p),
+            None => Err(system_error::SystemError::EPROTONOSUPPORT),
+        }
+    }
+}
+
+impl From<OptionsLevel> for u32 {
+    fn from(value: OptionsLevel) -> Self {
+        <OptionsLevel as num_traits::ToPrimitive>::to_u32(&value).unwrap()
+    }
+}

+ 133 - 0
kernel/src/net/socket/define/tmp.rs

@@ -0,0 +1,133 @@
+// bitflags! {
+//     // #[derive(PartialEq, Eq, Debug, Clone, Copy)]
+//     pub struct Options: u32 {
+//         const DEBUG = 1;
+//         const REUSEADDR = 2;
+//         const TYPE = 3;
+//         const ERROR = 4;
+//         const DONTROUTE = 5;
+//         const BROADCAST = 6;
+//         const SNDBUF = 7;
+//         const RCVBUF = 8;
+//         const SNDBUFFORCE = 32;
+//         const RCVBUFFORCE = 33;
+//         const KEEPALIVE = 9;
+//         const OOBINLINE = 10;
+//         const NO_CHECK = 11;
+//         const PRIORITY = 12;
+//         const LINGER = 13;
+//         const BSDCOMPAT = 14;
+//         const REUSEPORT = 15;
+//         const PASSCRED = 16;
+//         const PEERCRED = 17;
+//         const RCVLOWAT = 18;
+//         const SNDLOWAT = 19;
+//         const RCVTIMEO_OLD = 20;
+//         const SNDTIMEO_OLD = 21;
+//
+//         const SECURITY_AUTHENTICATION = 22;
+//         const SECURITY_ENCRYPTION_TRANSPORT = 23;
+//         const SECURITY_ENCRYPTION_NETWORK = 24;
+//
+//         const BINDTODEVICE = 25;
+//
+//         /// 与GET_FILTER相同
+//         const ATTACH_FILTER = 26;
+//         const DETACH_FILTER = 27;
+//
+//         const PEERNAME = 28;
+//
+//         const ACCEPTCONN = 30;
+//
+//         const PEERSEC = 31;
+//         const PASSSEC = 34;
+//
+//         const MARK = 36;
+//
+//         const PROTOCOL = 38;
+//         const DOMAIN = 39;
+//
+//         const RXQ_OVFL = 40;
+//
+//         /// 与SCM_WIFI_STATUS相同
+//         const WIFI_STATUS = 41;
+//         const PEEK_OFF = 42;
+//
+//         /* Instruct lower device to use last 4-bytes of skb data as FCS */
+//         const NOFCS = 43;
+//
+//         const LOCK_FILTER = 44;
+//         const SELECT_ERR_QUEUE = 45;
+//         const BUSY_POLL = 46;
+//         const MAX_PACING_RATE = 47;
+//         const BPF_EXTENSIONS = 48;
+//         const INCOMING_CPU = 49;
+//         const ATTACH_BPF = 50;
+//         // DETACH_BPF = DETACH_FILTER;
+//         const ATTACH_REUSEPORT_CBPF = 51;
+//         const ATTACH_REUSEPORT_EBPF = 52;
+//
+//         const CNX_ADVICE = 53;
+//         const SCM_TIMESTAMPING_OPT_STATS = 54;
+//         const MEMINFO = 55;
+//         const INCOMING_NAPI_ID = 56;
+//         const COOKIE = 57;
+//         const SCM_TIMESTAMPING_PKTINFO = 58;
+//         const PEERGROUPS = 59;
+//         const ZEROCOPY = 60;
+//         /// 与SCM_TXTIME相同
+//         const TXTIME = 61;
+//
+//         const BINDTOIFINDEX = 62;
+//
+//         const TIMESTAMP_OLD = 29;
+//         const TIMESTAMPNS_OLD = 35;
+//         const TIMESTAMPING_OLD = 37;
+//         const TIMESTAMP_NEW = 63;
+//         const TIMESTAMPNS_NEW = 64;
+//         const TIMESTAMPING_NEW = 65;
+//
+//         const RCVTIMEO_NEW = 66;
+//         const SNDTIMEO_NEW = 67;
+//
+//         const DETACH_REUSEPORT_BPF = 68;
+//
+//         const PREFER_BUSY_POLL = 69;
+//         const BUSY_POLL_BUDGET = 70;
+//
+//         const NETNS_COOKIE = 71;
+//         const BUF_LOCK = 72;
+//         const RESERVE_MEM = 73;
+//         const TXREHASH = 74;
+//         const RCVMARK = 75;
+//     }
+// }
+
+
+// bitflags::bitflags! {
+//     pub struct Level: i32 {
+//         const SOL_SOCKET = 1;
+//         const IPPROTO_IP = super::ip::Protocol::IP.bits();
+//         const IPPROTO_IPV6 = super::ip::Protocol::IPv6.bits();
+//         const IPPROTO_TCP = super::ip::Protocol::TCP.bits();
+//     }
+// }
+
+
+
+// bitflags! {
+//     /// @brief socket的选项
+//     #[derive(Default)]
+//     pub struct Options: u32 {
+//         /// 是否阻塞
+//         const BLOCK = 1 << 0;
+//         /// 是否允许广播
+//         const BROADCAST = 1 << 1;
+//         /// 是否允许多播
+//         const MULTICAST = 1 << 2;
+//         /// 是否允许重用地址
+//         const REUSEADDR = 1 << 3;
+//         /// 是否允许重用端口
+//         const REUSEPORT = 1 << 4;
+//     }
+// }

+ 43 - 0
kernel/src/net/socket/endpoint.rs

@@ -0,0 +1,43 @@
+use crate::{filesystem::vfs::InodeId, net::socket};
+use alloc::{string::String, sync::Arc};
+
+pub use smoltcp::wire::IpEndpoint;
+pub use socket::netlink::endpoint::NetlinkEndpoint;
+
+#[derive(Debug, Clone)]
+pub enum Endpoint {
+    /// 链路层端点
+    LinkLayer(LinkLayerEndpoint),
+    /// 网络层端点
+    Ip(IpEndpoint),
+    /// inode端点,Unix实际保存的端点
+    Inode((Arc<socket::Inode>, String)),
+    /// Unix传递id索引和path所用的端点
+    Unixpath((InodeId, String)),
+    /// NetLink端点
+    Netlink(NetlinkEndpoint),
+}
+
+/// @brief 链路层端点
+#[derive(Debug, Clone)]
+pub struct LinkLayerEndpoint {
+    /// 网卡的接口号
+    pub interface: usize,
+}
+
+impl LinkLayerEndpoint {
+    /// @brief 创建一个链路层端点
+    ///
+    /// @param interface 网卡的接口号
+    ///
+    /// @return 返回创建的链路层端点
+    pub fn new(interface: usize) -> Self {
+        Self { interface }
+    }
+}
+
+impl From<IpEndpoint> for Endpoint {
+    fn from(endpoint: IpEndpoint) -> Self {
+        Self::Ip(endpoint)
+    }
+}

+ 121 - 0
kernel/src/net/socket/family.rs

@@ -0,0 +1,121 @@
+/// # AddressFamily
+/// Socket address families.
+/// ## Reference
+/// https://code.dragonos.org.cn/xref/linux-5.19.10/include/linux/socket.h#180
+#[derive(Debug, Clone, Copy, PartialEq, Eq, FromPrimitive, ToPrimitive)]
+pub enum AddressFamily {
+    /// AF_UNSPEC 表示地址族未指定
+    Unspecified = 0,
+    /// AF_UNIX 表示Unix域的socket (与AF_LOCAL相同)
+    Unix = 1,
+    ///  AF_INET 表示IPv4的socket
+    INet = 2,
+    /// AF_AX25 表示AMPR AX.25的socket
+    AX25 = 3,
+    /// AF_IPX 表示IPX的socket
+    IPX = 4,
+    /// AF_APPLETALK 表示Appletalk的socket
+    Appletalk = 5,
+    /// AF_NETROM 表示AMPR NET/ROM的socket
+    Netrom = 6,
+    /// AF_BRIDGE 表示多协议桥接的socket
+    Bridge = 7,
+    /// AF_ATMPVC 表示ATM PVCs的socket
+    Atmpvc = 8,
+    /// AF_X25 表示X.25的socket
+    X25 = 9,
+    /// AF_INET6 表示IPv6的socket
+    INet6 = 10,
+    /// AF_ROSE 表示AMPR ROSE的socket
+    Rose = 11,
+    /// AF_DECnet Reserved for DECnet project
+    Decnet = 12,
+    /// AF_NETBEUI Reserved for 802.2LLC project
+    Netbeui = 13,
+    /// AF_SECURITY 表示Security callback的伪AF
+    Security = 14,
+    /// AF_KEY 表示Key management API
+    Key = 15,
+    /// AF_NETLINK 表示Netlink的socket
+    Netlink = 16,
+    /// AF_PACKET 表示Low level packet interface
+    Packet = 17,
+    /// AF_ASH 表示Ash
+    Ash = 18,
+    /// AF_ECONET 表示Acorn Econet
+    Econet = 19,
+    /// AF_ATMSVC 表示ATM SVCs
+    Atmsvc = 20,
+    /// AF_RDS 表示Reliable Datagram Sockets
+    Rds = 21,
+    /// AF_SNA 表示Linux SNA Project
+    Sna = 22,
+    /// AF_IRDA 表示IRDA sockets
+    Irda = 23,
+    /// AF_PPPOX 表示PPPoX sockets
+    Pppox = 24,
+    /// AF_WANPIPE 表示WANPIPE API sockets
+    WanPipe = 25,
+    /// AF_LLC 表示Linux LLC
+    Llc = 26,
+    /// AF_IB 表示Native InfiniBand address
+    /// 介绍:https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/9/html-single/configuring_infiniband_and_rdma_networks/index#understanding-infiniband-and-rdma_configuring-infiniband-and-rdma-networks
+    Ib = 27,
+    /// AF_MPLS 表示MPLS
+    Mpls = 28,
+    /// AF_CAN 表示Controller Area Network
+    Can = 29,
+    /// AF_TIPC 表示TIPC sockets
+    Tipc = 30,
+    /// AF_BLUETOOTH 表示Bluetooth sockets
+    Bluetooth = 31,
+    /// AF_IUCV 表示IUCV sockets
+    Iucv = 32,
+    /// AF_RXRPC 表示RxRPC sockets
+    Rxrpc = 33,
+    /// AF_ISDN 表示mISDN sockets
+    Isdn = 34,
+    /// AF_PHONET 表示Phonet sockets
+    Phonet = 35,
+    /// AF_IEEE802154 表示IEEE 802.15.4 sockets
+    Ieee802154 = 36,
+    /// AF_CAIF 表示CAIF sockets
+    Caif = 37,
+    /// AF_ALG 表示Algorithm sockets
+    Alg = 38,
+    /// AF_NFC 表示NFC sockets
+    Nfc = 39,
+    /// AF_VSOCK 表示vSockets
+    Vsock = 40,
+    /// AF_KCM 表示Kernel Connection Multiplexor
+    Kcm = 41,
+    /// AF_QIPCRTR 表示Qualcomm IPC Router
+    Qipcrtr = 42,
+    /// AF_SMC 表示SMC-R sockets.
+    /// reserve number for PF_SMC protocol family that reuses AF_INET address family
+    Smc = 43,
+    /// AF_XDP 表示XDP sockets
+    Xdp = 44,
+    /// AF_MCTP 表示Management Component Transport Protocol
+    Mctp = 45,
+    /// AF_MAX 表示最大的地址族
+    Max = 46,
+}
+
+use system_error::SystemError;
+
+impl core::convert::TryFrom<u16> for AddressFamily {
+    type Error = system_error::SystemError;
+    fn try_from(x: u16) -> Result<Self, Self::Error> {
+        use num_traits::FromPrimitive;
+        use SystemError::*;
+        return <Self as FromPrimitive>::from_u16(x).ok_or(EINVAL);
+    }
+}
+
+use crate::net::socket;
+use alloc::sync::Arc;
+
+pub trait Family {
+    fn socket(stype: socket::Type, protocol: u32) -> Result<Arc<socket::Inode>, SystemError>;
+}

+ 0 - 42
kernel/src/net/socket/handle.rs

@@ -1,42 +0,0 @@
-use ida::IdAllocator;
-use smoltcp::iface::SocketHandle;
-
-use crate::libs::spinlock::SpinLock;
-
-int_like!(KernelHandle, usize);
-
-/// # socket的句柄管理组件
-/// 它在smoltcp的SocketHandle上封装了一层,增加更多的功能。
-/// 比如,在socket被关闭时,自动释放socket的资源,通知系统的其他组件。
-#[derive(Debug, Hash, Eq, PartialEq, Clone, Copy)]
-pub enum GlobalSocketHandle {
-    Smoltcp(SocketHandle),
-    Kernel(KernelHandle),
-}
-
-static KERNEL_HANDLE_IDA: SpinLock<IdAllocator> =
-    SpinLock::new(IdAllocator::new(0, usize::MAX).unwrap());
-
-impl GlobalSocketHandle {
-    pub fn new_smoltcp_handle(handle: SocketHandle) -> Self {
-        return Self::Smoltcp(handle);
-    }
-
-    pub fn new_kernel_handle() -> Self {
-        return Self::Kernel(KernelHandle::new(KERNEL_HANDLE_IDA.lock().alloc().unwrap()));
-    }
-
-    pub fn smoltcp_handle(&self) -> Option<SocketHandle> {
-        if let Self::Smoltcp(sh) = *self {
-            return Some(sh);
-        }
-        None
-    }
-
-    pub fn kernel_handle(&self) -> Option<KernelHandle> {
-        if let Self::Kernel(kh) = *self {
-            return Some(kh);
-        }
-        None
-    }
-}

+ 0 - 1012
kernel/src/net/socket/inet.rs

@@ -1,1012 +0,0 @@
-use alloc::{boxed::Box, sync::Arc, vec::Vec};
-use log::{error, warn};
-use smoltcp::{
-    socket::{raw, tcp, udp},
-    wire,
-};
-use system_error::SystemError;
-
-use crate::{
-    driver::net::NetDevice,
-    libs::rwlock::RwLock,
-    net::{
-        event_poll::EPollEventType, net_core::poll_ifaces, Endpoint, Protocol, ShutdownType,
-        NET_DEVICES,
-    },
-};
-
-use super::{
-    handle::GlobalSocketHandle, PosixSocketHandleItem, Socket, SocketHandleItem, SocketMetadata,
-    SocketOptions, SocketPollMethod, SocketType, HANDLE_MAP, PORT_MANAGER, SOCKET_SET,
-};
-
-/// @brief 表示原始的socket。原始套接字绕过传输层协议(如 TCP 或 UDP)并提供对网络层协议(如 IP)的直接访问。
-///
-/// ref: https://man7.org/linux/man-pages/man7/raw.7.html
-#[derive(Debug, Clone)]
-pub struct RawSocket {
-    handle: GlobalSocketHandle,
-    /// 用户发送的数据包是否包含了IP头.
-    /// 如果是true,用户发送的数据包,必须包含IP头。(即用户要自行设置IP头+数据)
-    /// 如果是false,用户发送的数据包,不包含IP头。(即用户只要设置数据)
-    header_included: bool,
-    /// socket的metadata
-    metadata: SocketMetadata,
-    posix_item: Arc<PosixSocketHandleItem>,
-}
-
-impl RawSocket {
-    /// 元数据的缓冲区的大小
-    pub const DEFAULT_METADATA_BUF_SIZE: usize = 1024;
-    /// 默认的接收缓冲区的大小 receive
-    pub const DEFAULT_RX_BUF_SIZE: usize = 64 * 1024;
-    /// 默认的发送缓冲区的大小 transmiss
-    pub const DEFAULT_TX_BUF_SIZE: usize = 64 * 1024;
-
-    /// @brief 创建一个原始的socket
-    ///
-    /// @param protocol 协议号
-    /// @param options socket的选项
-    ///
-    /// @return 返回创建的原始的socket
-    pub fn new(protocol: Protocol, options: SocketOptions) -> Self {
-        let rx_buffer = raw::PacketBuffer::new(
-            vec![raw::PacketMetadata::EMPTY; Self::DEFAULT_METADATA_BUF_SIZE],
-            vec![0; Self::DEFAULT_RX_BUF_SIZE],
-        );
-        let tx_buffer = raw::PacketBuffer::new(
-            vec![raw::PacketMetadata::EMPTY; Self::DEFAULT_METADATA_BUF_SIZE],
-            vec![0; Self::DEFAULT_TX_BUF_SIZE],
-        );
-        let protocol: u8 = protocol.into();
-        let socket = raw::Socket::new(
-            wire::IpVersion::Ipv4,
-            wire::IpProtocol::from(protocol),
-            rx_buffer,
-            tx_buffer,
-        );
-
-        // 把socket添加到socket集合中,并得到socket的句柄
-        let handle = GlobalSocketHandle::new_smoltcp_handle(SOCKET_SET.lock_irqsave().add(socket));
-
-        let metadata = SocketMetadata::new(
-            SocketType::Raw,
-            Self::DEFAULT_RX_BUF_SIZE,
-            Self::DEFAULT_TX_BUF_SIZE,
-            Self::DEFAULT_METADATA_BUF_SIZE,
-            options,
-        );
-
-        let posix_item = Arc::new(PosixSocketHandleItem::new(None));
-
-        return Self {
-            handle,
-            header_included: false,
-            metadata,
-            posix_item,
-        };
-    }
-}
-
-impl Socket for RawSocket {
-    fn posix_item(&self) -> Arc<PosixSocketHandleItem> {
-        self.posix_item.clone()
-    }
-
-    fn close(&mut self) {
-        let mut socket_set_guard = SOCKET_SET.lock_irqsave();
-        if let smoltcp::socket::Socket::Udp(mut sock) =
-            socket_set_guard.remove(self.handle.smoltcp_handle().unwrap())
-        {
-            sock.close();
-        }
-        drop(socket_set_guard);
-        poll_ifaces();
-    }
-
-    fn read(&self, buf: &mut [u8]) -> (Result<usize, SystemError>, Endpoint) {
-        poll_ifaces();
-        loop {
-            // 如何优化这里?
-            let mut socket_set_guard = SOCKET_SET.lock_irqsave();
-            let socket =
-                socket_set_guard.get_mut::<raw::Socket>(self.handle.smoltcp_handle().unwrap());
-
-            match socket.recv_slice(buf) {
-                Ok(len) => {
-                    let packet = wire::Ipv4Packet::new_unchecked(buf);
-                    return (
-                        Ok(len),
-                        Endpoint::Ip(Some(wire::IpEndpoint {
-                            addr: wire::IpAddress::Ipv4(packet.src_addr()),
-                            port: 0,
-                        })),
-                    );
-                }
-                Err(_) => {
-                    if !self.metadata.options.contains(SocketOptions::BLOCK) {
-                        // 如果是非阻塞的socket,就返回错误
-                        return (Err(SystemError::EAGAIN_OR_EWOULDBLOCK), Endpoint::Ip(None));
-                    }
-                }
-            }
-            drop(socket_set_guard);
-            self.posix_item.sleep(EPollEventType::EPOLLIN.bits() as u64);
-        }
-    }
-
-    fn write(&self, buf: &[u8], to: Option<Endpoint>) -> Result<usize, SystemError> {
-        // 如果用户发送的数据包,包含IP头,则直接发送
-        if self.header_included {
-            let mut socket_set_guard = SOCKET_SET.lock_irqsave();
-            let socket =
-                socket_set_guard.get_mut::<raw::Socket>(self.handle.smoltcp_handle().unwrap());
-            match socket.send_slice(buf) {
-                Ok(_) => {
-                    return Ok(buf.len());
-                }
-                Err(raw::SendError::BufferFull) => {
-                    return Err(SystemError::ENOBUFS);
-                }
-            }
-        } else {
-            // 如果用户发送的数据包,不包含IP头,则需要自己构造IP头
-
-            if let Some(Endpoint::Ip(Some(endpoint))) = to {
-                let mut socket_set_guard = SOCKET_SET.lock_irqsave();
-                let socket: &mut raw::Socket =
-                    socket_set_guard.get_mut::<raw::Socket>(self.handle.smoltcp_handle().unwrap());
-
-                // 暴力解决方案:只考虑0号网卡。 TODO:考虑多网卡的情况!!!
-                let iface = NET_DEVICES.read_irqsave().get(&0).unwrap().clone();
-
-                // 构造IP头
-                let ipv4_src_addr: Option<wire::Ipv4Address> =
-                    iface.inner_iface().lock().ipv4_addr();
-                if ipv4_src_addr.is_none() {
-                    return Err(SystemError::ENETUNREACH);
-                }
-                let ipv4_src_addr = ipv4_src_addr.unwrap();
-
-                if let wire::IpAddress::Ipv4(ipv4_dst) = endpoint.addr {
-                    let len = buf.len();
-
-                    // 创建20字节的IPv4头部
-                    let mut buffer: Vec<u8> = vec![0u8; len + 20];
-                    let mut packet: wire::Ipv4Packet<&mut Vec<u8>> =
-                        wire::Ipv4Packet::new_unchecked(&mut buffer);
-
-                    // 封装ipv4 header
-                    packet.set_version(4);
-                    packet.set_header_len(20);
-                    packet.set_total_len((20 + len) as u16);
-                    packet.set_src_addr(ipv4_src_addr);
-                    packet.set_dst_addr(ipv4_dst);
-
-                    // 设置ipv4 header的protocol字段
-                    packet.set_next_header(socket.ip_protocol());
-
-                    // 获取IP数据包的负载字段
-                    let payload: &mut [u8] = packet.payload_mut();
-                    payload.copy_from_slice(buf);
-
-                    // 填充checksum字段
-                    packet.fill_checksum();
-
-                    // 发送数据包
-                    socket.send_slice(&buffer).unwrap();
-
-                    iface.poll(&mut socket_set_guard).ok();
-
-                    drop(socket_set_guard);
-                    return Ok(len);
-                } else {
-                    warn!("Unsupport Ip protocol type!");
-                    return Err(SystemError::EINVAL);
-                }
-            } else {
-                // 如果没有指定目的地址,则返回错误
-                return Err(SystemError::ENOTCONN);
-            }
-        }
-    }
-
-    fn connect(&mut self, _endpoint: Endpoint) -> Result<(), SystemError> {
-        Ok(())
-    }
-
-    fn metadata(&self) -> SocketMetadata {
-        self.metadata.clone()
-    }
-
-    fn box_clone(&self) -> Box<dyn Socket> {
-        Box::new(self.clone())
-    }
-
-    fn socket_handle(&self) -> GlobalSocketHandle {
-        self.handle
-    }
-
-    fn as_any_ref(&self) -> &dyn core::any::Any {
-        self
-    }
-
-    fn as_any_mut(&mut self) -> &mut dyn core::any::Any {
-        self
-    }
-}
-
-/// @brief 表示udp socket
-///
-/// https://man7.org/linux/man-pages/man7/udp.7.html
-#[derive(Debug, Clone)]
-pub struct UdpSocket {
-    pub handle: GlobalSocketHandle,
-    remote_endpoint: Option<Endpoint>, // 记录远程endpoint提供给connect(), 应该使用IP地址。
-    metadata: SocketMetadata,
-    posix_item: Arc<PosixSocketHandleItem>,
-}
-
-impl UdpSocket {
-    /// 元数据的缓冲区的大小
-    pub const DEFAULT_METADATA_BUF_SIZE: usize = 1024;
-    /// 默认的接收缓冲区的大小 receive
-    pub const DEFAULT_RX_BUF_SIZE: usize = 64 * 1024;
-    /// 默认的发送缓冲区的大小 transmiss
-    pub const DEFAULT_TX_BUF_SIZE: usize = 64 * 1024;
-
-    /// @brief 创建一个udp的socket
-    ///
-    /// @param options socket的选项
-    ///
-    /// @return 返回创建的udp的socket
-    pub fn new(options: SocketOptions) -> Self {
-        let rx_buffer = udp::PacketBuffer::new(
-            vec![udp::PacketMetadata::EMPTY; Self::DEFAULT_METADATA_BUF_SIZE],
-            vec![0; Self::DEFAULT_RX_BUF_SIZE],
-        );
-        let tx_buffer = udp::PacketBuffer::new(
-            vec![udp::PacketMetadata::EMPTY; Self::DEFAULT_METADATA_BUF_SIZE],
-            vec![0; Self::DEFAULT_TX_BUF_SIZE],
-        );
-        let socket = udp::Socket::new(rx_buffer, tx_buffer);
-
-        // 把socket添加到socket集合中,并得到socket的句柄
-        let handle: GlobalSocketHandle =
-            GlobalSocketHandle::new_smoltcp_handle(SOCKET_SET.lock_irqsave().add(socket));
-
-        let metadata = SocketMetadata::new(
-            SocketType::Udp,
-            Self::DEFAULT_RX_BUF_SIZE,
-            Self::DEFAULT_TX_BUF_SIZE,
-            Self::DEFAULT_METADATA_BUF_SIZE,
-            options,
-        );
-
-        let posix_item = Arc::new(PosixSocketHandleItem::new(None));
-
-        return Self {
-            handle,
-            remote_endpoint: None,
-            metadata,
-            posix_item,
-        };
-    }
-
-    fn do_bind(&self, socket: &mut udp::Socket, endpoint: Endpoint) -> Result<(), SystemError> {
-        if let Endpoint::Ip(Some(mut ip)) = endpoint {
-            // 端口为0则分配随机端口
-            if ip.port == 0 {
-                ip.port = PORT_MANAGER.get_ephemeral_port(self.metadata.socket_type)?;
-            }
-            // 检测端口是否已被占用
-            PORT_MANAGER.bind_port(self.metadata.socket_type, ip.port)?;
-
-            let bind_res = if ip.addr.is_unspecified() {
-                socket.bind(ip.port)
-            } else {
-                socket.bind(ip)
-            };
-
-            match bind_res {
-                Ok(()) => return Ok(()),
-                Err(_) => return Err(SystemError::EINVAL),
-            }
-        } else {
-            return Err(SystemError::EINVAL);
-        }
-    }
-}
-
-impl Socket for UdpSocket {
-    fn posix_item(&self) -> Arc<PosixSocketHandleItem> {
-        self.posix_item.clone()
-    }
-
-    fn close(&mut self) {
-        let mut socket_set_guard = SOCKET_SET.lock_irqsave();
-        if let smoltcp::socket::Socket::Udp(mut sock) =
-            socket_set_guard.remove(self.handle.smoltcp_handle().unwrap())
-        {
-            sock.close();
-        }
-        drop(socket_set_guard);
-        poll_ifaces();
-    }
-
-    /// @brief 在read函数执行之前,请先bind到本地的指定端口
-    fn read(&self, buf: &mut [u8]) -> (Result<usize, SystemError>, Endpoint) {
-        loop {
-            // debug!("Wait22 to Read");
-            poll_ifaces();
-            let mut socket_set_guard = SOCKET_SET.lock_irqsave();
-            let socket =
-                socket_set_guard.get_mut::<udp::Socket>(self.handle.smoltcp_handle().unwrap());
-
-            // debug!("Wait to Read");
-
-            if socket.can_recv() {
-                if let Ok((size, metadata)) = socket.recv_slice(buf) {
-                    drop(socket_set_guard);
-                    poll_ifaces();
-                    return (Ok(size), Endpoint::Ip(Some(metadata.endpoint)));
-                }
-            } else {
-                // 如果socket没有连接,则忙等
-                // return (Err(SystemError::ENOTCONN), Endpoint::Ip(None));
-            }
-            drop(socket_set_guard);
-            self.posix_item.sleep(EPollEventType::EPOLLIN.bits() as u64);
-        }
-    }
-
-    fn write(&self, buf: &[u8], to: Option<Endpoint>) -> Result<usize, SystemError> {
-        // debug!("udp to send: {:?}, len={}", to, buf.len());
-        let remote_endpoint: &wire::IpEndpoint = {
-            if let Some(Endpoint::Ip(Some(ref endpoint))) = to {
-                endpoint
-            } else if let Some(Endpoint::Ip(Some(ref endpoint))) = self.remote_endpoint {
-                endpoint
-            } else {
-                return Err(SystemError::ENOTCONN);
-            }
-        };
-        // debug!("udp write: remote = {:?}", remote_endpoint);
-
-        let mut socket_set_guard = SOCKET_SET.lock_irqsave();
-        let socket = socket_set_guard.get_mut::<udp::Socket>(self.handle.smoltcp_handle().unwrap());
-        // debug!("is open()={}", socket.is_open());
-        // debug!("socket endpoint={:?}", socket.endpoint());
-        if socket.can_send() {
-            // debug!("udp write: can send");
-            match socket.send_slice(buf, *remote_endpoint) {
-                Ok(()) => {
-                    // debug!("udp write: send ok");
-                    drop(socket_set_guard);
-                    poll_ifaces();
-                    return Ok(buf.len());
-                }
-                Err(_) => {
-                    // debug!("udp write: send err");
-                    return Err(SystemError::ENOBUFS);
-                }
-            }
-        } else {
-            // debug!("udp write: can not send");
-            return Err(SystemError::ENOBUFS);
-        };
-    }
-
-    fn bind(&mut self, endpoint: Endpoint) -> Result<(), SystemError> {
-        let mut sockets = SOCKET_SET.lock_irqsave();
-        let socket = sockets.get_mut::<udp::Socket>(self.handle.smoltcp_handle().unwrap());
-        // debug!("UDP Bind to {:?}", endpoint);
-        return self.do_bind(socket, endpoint);
-    }
-
-    fn poll(&self) -> EPollEventType {
-        let sockets = SOCKET_SET.lock_irqsave();
-        let socket = sockets.get::<udp::Socket>(self.handle.smoltcp_handle().unwrap());
-
-        return SocketPollMethod::udp_poll(
-            socket,
-            HANDLE_MAP
-                .read_irqsave()
-                .get(&self.socket_handle())
-                .unwrap()
-                .shutdown_type(),
-        );
-    }
-
-    fn connect(&mut self, endpoint: Endpoint) -> Result<(), SystemError> {
-        if let Endpoint::Ip(_) = endpoint {
-            self.remote_endpoint = Some(endpoint);
-            Ok(())
-        } else {
-            Err(SystemError::EINVAL)
-        }
-    }
-
-    fn ioctl(
-        &self,
-        _cmd: usize,
-        _arg0: usize,
-        _arg1: usize,
-        _arg2: usize,
-    ) -> Result<usize, SystemError> {
-        todo!()
-    }
-
-    fn metadata(&self) -> SocketMetadata {
-        self.metadata.clone()
-    }
-
-    fn box_clone(&self) -> Box<dyn Socket> {
-        return Box::new(self.clone());
-    }
-
-    fn endpoint(&self) -> Option<Endpoint> {
-        let sockets = SOCKET_SET.lock_irqsave();
-        let socket = sockets.get::<udp::Socket>(self.handle.smoltcp_handle().unwrap());
-        let listen_endpoint = socket.endpoint();
-
-        if listen_endpoint.port == 0 {
-            return None;
-        } else {
-            // 如果listen_endpoint的address是None,意味着“监听所有的地址”。
-            // 这里假设所有的地址都是ipv4
-            // TODO: 支持ipv6
-            let result = wire::IpEndpoint::new(
-                listen_endpoint
-                    .addr
-                    .unwrap_or(wire::IpAddress::v4(0, 0, 0, 0)),
-                listen_endpoint.port,
-            );
-            return Some(Endpoint::Ip(Some(result)));
-        }
-    }
-
-    fn peer_endpoint(&self) -> Option<Endpoint> {
-        return self.remote_endpoint.clone();
-    }
-
-    fn socket_handle(&self) -> GlobalSocketHandle {
-        self.handle
-    }
-
-    fn as_any_ref(&self) -> &dyn core::any::Any {
-        self
-    }
-
-    fn as_any_mut(&mut self) -> &mut dyn core::any::Any {
-        self
-    }
-}
-
-/// @brief 表示 tcp socket
-///
-/// https://man7.org/linux/man-pages/man7/tcp.7.html
-#[derive(Debug, Clone)]
-pub struct TcpSocket {
-    handles: Vec<GlobalSocketHandle>,
-    local_endpoint: Option<wire::IpEndpoint>, // save local endpoint for bind()
-    is_listening: bool,
-    metadata: SocketMetadata,
-    posix_item: Arc<PosixSocketHandleItem>,
-}
-
-impl TcpSocket {
-    /// 元数据的缓冲区的大小
-    pub const DEFAULT_METADATA_BUF_SIZE: usize = 1024;
-    /// 默认的接收缓冲区的大小 receive
-    pub const DEFAULT_RX_BUF_SIZE: usize = 512 * 1024;
-    /// 默认的发送缓冲区的大小 transmiss
-    pub const DEFAULT_TX_BUF_SIZE: usize = 512 * 1024;
-
-    /// TcpSocket的特殊事件,用于在事件等待队列上sleep
-    pub const CAN_CONNECT: u64 = 1u64 << 63;
-    pub const CAN_ACCPET: u64 = 1u64 << 62;
-
-    /// @brief 创建一个tcp的socket
-    ///
-    /// @param options socket的选项
-    ///
-    /// @return 返回创建的tcp的socket
-    pub fn new(options: SocketOptions) -> Self {
-        // 创建handles数组并把socket添加到socket集合中,并得到socket的句柄
-        let handles: Vec<GlobalSocketHandle> = vec![GlobalSocketHandle::new_smoltcp_handle(
-            SOCKET_SET.lock_irqsave().add(Self::create_new_socket()),
-        )];
-
-        let metadata = SocketMetadata::new(
-            SocketType::Tcp,
-            Self::DEFAULT_RX_BUF_SIZE,
-            Self::DEFAULT_TX_BUF_SIZE,
-            Self::DEFAULT_METADATA_BUF_SIZE,
-            options,
-        );
-        let posix_item = Arc::new(PosixSocketHandleItem::new(None));
-        // debug!("when there's a new tcp socket,its'len: {}",handles.len());
-
-        return Self {
-            handles,
-            local_endpoint: None,
-            is_listening: false,
-            metadata,
-            posix_item,
-        };
-    }
-
-    fn do_listen(
-        &mut self,
-        socket: &mut tcp::Socket,
-        local_endpoint: wire::IpEndpoint,
-    ) -> Result<(), SystemError> {
-        let listen_result = if local_endpoint.addr.is_unspecified() {
-            socket.listen(local_endpoint.port)
-        } else {
-            socket.listen(local_endpoint)
-        };
-        return match listen_result {
-            Ok(()) => {
-                // debug!(
-                //     "Tcp Socket Listen on {local_endpoint}, open?:{}",
-                //     socket.is_open()
-                // );
-                self.is_listening = true;
-
-                Ok(())
-            }
-            Err(_) => Err(SystemError::EINVAL),
-        };
-    }
-
-    /// # create_new_socket - 创建新的TCP套接字
-    ///
-    /// 该函数用于创建一个新的TCP套接字,并返回该套接字的引用。
-    fn create_new_socket() -> tcp::Socket<'static> {
-        // 初始化tcp的buffer
-        let rx_buffer = tcp::SocketBuffer::new(vec![0; Self::DEFAULT_RX_BUF_SIZE]);
-        let tx_buffer = tcp::SocketBuffer::new(vec![0; Self::DEFAULT_TX_BUF_SIZE]);
-        tcp::Socket::new(rx_buffer, tx_buffer)
-    }
-
-    /// listening状态的posix socket是需要特殊处理的
-    fn tcp_poll_listening(&self) -> EPollEventType {
-        let socketset_guard = SOCKET_SET.lock_irqsave();
-
-        let can_accept = self.handles.iter().any(|h| {
-            if let Some(sh) = h.smoltcp_handle() {
-                let socket = socketset_guard.get::<tcp::Socket>(sh);
-                socket.is_active()
-            } else {
-                false
-            }
-        });
-
-        if can_accept {
-            return EPollEventType::EPOLL_LISTEN_CAN_ACCEPT;
-        } else {
-            return EPollEventType::empty();
-        }
-    }
-}
-
-impl Socket for TcpSocket {
-    fn posix_item(&self) -> Arc<PosixSocketHandleItem> {
-        self.posix_item.clone()
-    }
-
-    fn close(&mut self) {
-        for handle in self.handles.iter() {
-            {
-                let mut socket_set_guard = SOCKET_SET.lock_irqsave();
-                let smoltcp_handle = handle.smoltcp_handle().unwrap();
-                socket_set_guard
-                    .get_mut::<smoltcp::socket::tcp::Socket>(smoltcp_handle)
-                    .close();
-                drop(socket_set_guard);
-            }
-            poll_ifaces();
-            SOCKET_SET
-                .lock_irqsave()
-                .remove(handle.smoltcp_handle().unwrap());
-            // debug!("[Socket] [TCP] Close: {:?}", handle);
-        }
-    }
-
-    fn read(&self, buf: &mut [u8]) -> (Result<usize, SystemError>, Endpoint) {
-        if HANDLE_MAP
-            .read_irqsave()
-            .get(&self.socket_handle())
-            .unwrap()
-            .shutdown_type()
-            .contains(ShutdownType::RCV_SHUTDOWN)
-        {
-            return (Err(SystemError::ENOTCONN), Endpoint::Ip(None));
-        }
-        // debug!("tcp socket: read, buf len={}", buf.len());
-        // debug!("tcp socket:read, socket'len={}",self.handle.len());
-        loop {
-            poll_ifaces();
-            let mut socket_set_guard = SOCKET_SET.lock_irqsave();
-
-            let socket = socket_set_guard
-                .get_mut::<tcp::Socket>(self.handles.first().unwrap().smoltcp_handle().unwrap());
-
-            // 如果socket已经关闭,返回错误
-            if !socket.is_active() {
-                // debug!("Tcp Socket Read Error, socket is closed");
-                return (Err(SystemError::ENOTCONN), Endpoint::Ip(None));
-            }
-
-            if socket.may_recv() {
-                match socket.recv_slice(buf) {
-                    Ok(size) => {
-                        if size > 0 {
-                            let endpoint = if let Some(p) = socket.remote_endpoint() {
-                                p
-                            } else {
-                                return (Err(SystemError::ENOTCONN), Endpoint::Ip(None));
-                            };
-
-                            drop(socket_set_guard);
-                            poll_ifaces();
-                            return (Ok(size), Endpoint::Ip(Some(endpoint)));
-                        }
-                    }
-                    Err(tcp::RecvError::InvalidState) => {
-                        warn!("Tcp Socket Read Error, InvalidState");
-                        return (Err(SystemError::ENOTCONN), Endpoint::Ip(None));
-                    }
-                    Err(tcp::RecvError::Finished) => {
-                        // 对端写端已关闭,我们应该关闭读端
-                        HANDLE_MAP
-                            .write_irqsave()
-                            .get_mut(&self.socket_handle())
-                            .unwrap()
-                            .shutdown_type_writer()
-                            .insert(ShutdownType::RCV_SHUTDOWN);
-                        return (Err(SystemError::ENOTCONN), Endpoint::Ip(None));
-                    }
-                }
-            } else {
-                return (Err(SystemError::ENOTCONN), Endpoint::Ip(None));
-            }
-            drop(socket_set_guard);
-            self.posix_item
-                .sleep((EPollEventType::EPOLLIN | EPollEventType::EPOLLHUP).bits() as u64);
-        }
-    }
-
-    fn write(&self, buf: &[u8], _to: Option<Endpoint>) -> Result<usize, SystemError> {
-        if HANDLE_MAP
-            .read_irqsave()
-            .get(&self.socket_handle())
-            .unwrap()
-            .shutdown_type()
-            .contains(ShutdownType::RCV_SHUTDOWN)
-        {
-            return Err(SystemError::ENOTCONN);
-        }
-        // debug!("tcp socket:write, socket'len={}",self.handle.len());
-
-        let mut socket_set_guard = SOCKET_SET.lock_irqsave();
-
-        let socket = socket_set_guard
-            .get_mut::<tcp::Socket>(self.handles.first().unwrap().smoltcp_handle().unwrap());
-
-        if socket.is_open() {
-            if socket.can_send() {
-                match socket.send_slice(buf) {
-                    Ok(size) => {
-                        drop(socket_set_guard);
-                        poll_ifaces();
-                        return Ok(size);
-                    }
-                    Err(e) => {
-                        error!("Tcp Socket Write Error {e:?}");
-                        return Err(SystemError::ENOBUFS);
-                    }
-                }
-            } else {
-                return Err(SystemError::ENOBUFS);
-            }
-        }
-
-        return Err(SystemError::ENOTCONN);
-    }
-
-    fn poll(&self) -> EPollEventType {
-        // 处理listen的快速路径
-        if self.is_listening {
-            return self.tcp_poll_listening();
-        }
-        // 由于上面处理了listening状态,所以这里只处理非listening状态,这种情况下只有一个handle
-
-        assert!(self.handles.len() == 1);
-
-        let mut socket_set_guard = SOCKET_SET.lock_irqsave();
-        // debug!("tcp socket:poll, socket'len={}",self.handle.len());
-
-        let socket = socket_set_guard
-            .get_mut::<tcp::Socket>(self.handles.first().unwrap().smoltcp_handle().unwrap());
-        let handle_map_guard = HANDLE_MAP.read_irqsave();
-        let handle_item = handle_map_guard.get(&self.socket_handle()).unwrap();
-        let shutdown_type = handle_item.shutdown_type();
-        let is_posix_listen = handle_item.is_posix_listen;
-        drop(handle_map_guard);
-
-        return SocketPollMethod::tcp_poll(socket, shutdown_type, is_posix_listen);
-    }
-
-    fn connect(&mut self, endpoint: Endpoint) -> Result<(), SystemError> {
-        let mut sockets = SOCKET_SET.lock_irqsave();
-        // debug!("tcp socket:connect, socket'len={}", self.handles.len());
-
-        let socket =
-            sockets.get_mut::<tcp::Socket>(self.handles.first().unwrap().smoltcp_handle().unwrap());
-
-        if let Endpoint::Ip(Some(ip)) = endpoint {
-            let temp_port = PORT_MANAGER.get_ephemeral_port(self.metadata.socket_type)?;
-            // 检测端口是否被占用
-            PORT_MANAGER.bind_port(self.metadata.socket_type, temp_port)?;
-
-            // debug!("temp_port: {}", temp_port);
-            let iface: Arc<dyn NetDevice> = NET_DEVICES.write_irqsave().get(&0).unwrap().clone();
-            let mut inner_iface = iface.inner_iface().lock();
-            // debug!("to connect: {ip:?}");
-
-            match socket.connect(inner_iface.context(), ip, temp_port) {
-                Ok(()) => {
-                    // avoid deadlock
-                    drop(inner_iface);
-                    drop(iface);
-                    drop(sockets);
-                    loop {
-                        poll_ifaces();
-                        let mut sockets = SOCKET_SET.lock_irqsave();
-                        let socket = sockets.get_mut::<tcp::Socket>(
-                            self.handles.first().unwrap().smoltcp_handle().unwrap(),
-                        );
-
-                        match socket.state() {
-                            tcp::State::Established => {
-                                return Ok(());
-                            }
-                            tcp::State::SynSent => {
-                                drop(sockets);
-                                self.posix_item.sleep(Self::CAN_CONNECT);
-                            }
-                            _ => {
-                                return Err(SystemError::ECONNREFUSED);
-                            }
-                        }
-                    }
-                }
-                Err(e) => {
-                    // error!("Tcp Socket Connect Error {e:?}");
-                    match e {
-                        tcp::ConnectError::InvalidState => return Err(SystemError::EISCONN),
-                        tcp::ConnectError::Unaddressable => return Err(SystemError::EADDRNOTAVAIL),
-                    }
-                }
-            }
-        } else {
-            return Err(SystemError::EINVAL);
-        }
-    }
-
-    /// @brief tcp socket 监听 local_endpoint 端口
-    ///
-    /// @param backlog 未处理的连接队列的最大长度
-    fn listen(&mut self, backlog: usize) -> Result<(), SystemError> {
-        if self.is_listening {
-            return Ok(());
-        }
-
-        // debug!(
-        //     "tcp socket:listen, socket'len={}, backlog = {backlog}",
-        //     self.handles.len()
-        // );
-
-        let local_endpoint = self.local_endpoint.ok_or(SystemError::EINVAL)?;
-        let mut sockets = SOCKET_SET.lock_irqsave();
-        // 获取handle的数量
-        let handlen = self.handles.len();
-        let backlog = handlen.max(backlog);
-
-        // 添加剩余需要构建的socket
-        // debug!("tcp socket:before listen, socket'len={}", self.handle_list.len());
-        let mut handle_guard = HANDLE_MAP.write_irqsave();
-        let socket_handle_item_0 = handle_guard.get_mut(&self.socket_handle()).unwrap();
-        socket_handle_item_0.is_posix_listen = true;
-
-        self.handles.extend((handlen..backlog).map(|_| {
-            let socket = Self::create_new_socket();
-            let handle = GlobalSocketHandle::new_smoltcp_handle(sockets.add(socket));
-            let mut handle_item = SocketHandleItem::new(Arc::downgrade(&self.posix_item));
-            handle_item.is_posix_listen = true;
-            handle_guard.insert(handle, handle_item);
-            handle
-        }));
-
-        // debug!("tcp socket:listen, socket'len={}", self.handles.len());
-        // debug!("tcp socket:listen, backlog={backlog}");
-
-        // 监听所有的socket
-        for i in 0..backlog {
-            let handle = self.handles.get(i).unwrap();
-
-            let socket = sockets.get_mut::<tcp::Socket>(handle.smoltcp_handle().unwrap());
-
-            if !socket.is_listening() {
-                // debug!("Tcp Socket is already listening on {local_endpoint}");
-                self.do_listen(socket, local_endpoint)?;
-            }
-            // debug!("Tcp Socket  before listen, open={}", socket.is_open());
-        }
-
-        return Ok(());
-    }
-
-    fn bind(&mut self, endpoint: Endpoint) -> Result<(), SystemError> {
-        if let Endpoint::Ip(Some(mut ip)) = endpoint {
-            if ip.port == 0 {
-                ip.port = PORT_MANAGER.get_ephemeral_port(self.metadata.socket_type)?;
-            }
-
-            // 检测端口是否已被占用
-            PORT_MANAGER.bind_port(self.metadata.socket_type, ip.port)?;
-            // debug!("tcp socket:bind, socket'len={}",self.handle.len());
-
-            self.local_endpoint = Some(ip);
-            self.is_listening = false;
-
-            return Ok(());
-        }
-        return Err(SystemError::EINVAL);
-    }
-
-    fn shutdown(&mut self, shutdown_type: super::ShutdownType) -> Result<(), SystemError> {
-        // TODO:目前只是在表层判断,对端不知晓,后续需使用tcp实现
-        HANDLE_MAP
-            .write_irqsave()
-            .get_mut(&self.socket_handle())
-            .unwrap()
-            .shutdown_type = RwLock::new(shutdown_type);
-        return Ok(());
-    }
-
-    fn accept(&mut self) -> Result<(Box<dyn Socket>, Endpoint), SystemError> {
-        if !self.is_listening {
-            return Err(SystemError::EINVAL);
-        }
-        let endpoint = self.local_endpoint.ok_or(SystemError::EINVAL)?;
-        loop {
-            // debug!("tcp accept: poll_ifaces()");
-            poll_ifaces();
-            // debug!("tcp socket:accept, socket'len={}", self.handle_list.len());
-
-            let mut sockset = SOCKET_SET.lock_irqsave();
-            // Get the corresponding activated handler
-            let global_handle_index = self.handles.iter().position(|handle| {
-                let con_smol_sock = sockset.get::<tcp::Socket>(handle.smoltcp_handle().unwrap());
-                con_smol_sock.is_active()
-            });
-
-            if let Some(handle_index) = global_handle_index {
-                let con_smol_sock = sockset
-                    .get::<tcp::Socket>(self.handles[handle_index].smoltcp_handle().unwrap());
-
-                // debug!("[Socket] [TCP] Accept: {:?}", handle);
-                // handle is connected socket's handle
-                let remote_ep = con_smol_sock
-                    .remote_endpoint()
-                    .ok_or(SystemError::ENOTCONN)?;
-
-                let tcp_socket = Self::create_new_socket();
-
-                let new_handle = GlobalSocketHandle::new_smoltcp_handle(sockset.add(tcp_socket));
-
-                // let handle in TcpSock be the new empty handle, and return the old connected handle
-                let old_handle = core::mem::replace(&mut self.handles[handle_index], new_handle);
-
-                let metadata = SocketMetadata::new(
-                    SocketType::Tcp,
-                    Self::DEFAULT_TX_BUF_SIZE,
-                    Self::DEFAULT_RX_BUF_SIZE,
-                    Self::DEFAULT_METADATA_BUF_SIZE,
-                    self.metadata.options,
-                );
-
-                let sock_ret = Box::new(TcpSocket {
-                    handles: vec![old_handle],
-                    local_endpoint: self.local_endpoint,
-                    is_listening: false,
-                    metadata,
-                    posix_item: Arc::new(PosixSocketHandleItem::new(None)),
-                });
-
-                {
-                    let mut handle_guard = HANDLE_MAP.write_irqsave();
-                    // 先删除原来的
-                    let item = handle_guard.remove(&old_handle).unwrap();
-                    item.reset_shutdown_type();
-                    assert!(item.is_posix_listen);
-
-                    // 按照smoltcp行为,将新的handle绑定到原来的item
-                    let new_item = SocketHandleItem::new(Arc::downgrade(&sock_ret.posix_item));
-                    handle_guard.insert(old_handle, new_item);
-                    // 插入新的item
-                    handle_guard.insert(new_handle, item);
-
-                    let socket = sockset.get_mut::<tcp::Socket>(
-                        self.handles[handle_index].smoltcp_handle().unwrap(),
-                    );
-
-                    if !socket.is_listening() {
-                        self.do_listen(socket, endpoint)?;
-                    }
-
-                    drop(handle_guard);
-                }
-
-                return Ok((sock_ret, Endpoint::Ip(Some(remote_ep))));
-            }
-
-            drop(sockset);
-
-            // debug!("[TCP] [Accept] sleeping socket with handle: {:?}", self.handles.first().unwrap().smoltcp_handle().unwrap());
-            self.posix_item.sleep(Self::CAN_ACCPET);
-            // debug!("tcp socket:after sleep, handle_guard'len={}",HANDLE_MAP.write_irqsave().len());
-        }
-    }
-
-    fn endpoint(&self) -> Option<Endpoint> {
-        let mut result: Option<Endpoint> = self.local_endpoint.map(|x| Endpoint::Ip(Some(x)));
-
-        if result.is_none() {
-            let sockets = SOCKET_SET.lock_irqsave();
-            // debug!("tcp socket:endpoint, socket'len={}",self.handle.len());
-
-            let socket =
-                sockets.get::<tcp::Socket>(self.handles.first().unwrap().smoltcp_handle().unwrap());
-            if let Some(ep) = socket.local_endpoint() {
-                result = Some(Endpoint::Ip(Some(ep)));
-            }
-        }
-        return result;
-    }
-
-    fn peer_endpoint(&self) -> Option<Endpoint> {
-        let sockets = SOCKET_SET.lock_irqsave();
-        // debug!("tcp socket:peer_endpoint, socket'len={}",self.handle.len());
-
-        let socket =
-            sockets.get::<tcp::Socket>(self.handles.first().unwrap().smoltcp_handle().unwrap());
-        return socket.remote_endpoint().map(|x| Endpoint::Ip(Some(x)));
-    }
-
-    fn metadata(&self) -> SocketMetadata {
-        self.metadata.clone()
-    }
-
-    fn box_clone(&self) -> Box<dyn Socket> {
-        Box::new(self.clone())
-    }
-
-    fn socket_handle(&self) -> GlobalSocketHandle {
-        // debug!("tcp socket:socket_handle, socket'len={}",self.handle.len());
-
-        *self.handles.first().unwrap()
-    }
-
-    fn as_any_ref(&self) -> &dyn core::any::Any {
-        self
-    }
-
-    fn as_any_mut(&mut self) -> &mut dyn core::any::Any {
-        self
-    }
-}

+ 148 - 0
kernel/src/net/socket/inet/common/mod.rs

@@ -0,0 +1,148 @@
+use crate::net::{Iface, NET_DEVICES};
+use alloc::sync::Arc;
+use system_error::SystemError::{self, *};
+
+pub mod port;
+pub use port::PortManager;
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub enum Types {
+    Raw,
+    Icmp,
+    Udp,
+    Tcp,
+    Dhcpv4,
+    Dns,
+}
+
+/**
+ * 目前,以下设计仍然没有考虑多网卡的listen问题,仅只解决了socket在绑定单网卡下的问题。
+ */
+
+#[derive(Debug)]
+pub struct BoundInner {
+    handle: smoltcp::iface::SocketHandle,
+    iface: Arc<dyn Iface>,
+    // inner: Vec<(smoltcp::iface::SocketHandle, Arc<dyn Iface>)>
+    // address: smoltcp::wire::IpAddress,
+}
+
+impl BoundInner {
+    /// # `bind`
+    /// 将socket绑定到指定的地址上,置入指定的网络接口中
+    pub fn bind<T>(
+        socket: T,
+        // socket_type: Types,
+        address: &smoltcp::wire::IpAddress,
+    ) -> Result<Self, SystemError>
+    where
+        T: smoltcp::socket::AnySocket<'static>,
+    {
+        if address.is_unspecified() {
+            // let inner = Vec::new();
+            // for (_, iface) in *NET_DEVICES.read_irqsave() {
+            //     let handle = iface.sockets().lock_no_preempt().add(socket);
+            //     iface
+            // }
+            // 强绑VirtualIO
+            log::debug!("Not bind to any iface, bind to virtIO");
+            let iface = NET_DEVICES
+                .read_irqsave()
+                .get(&0)
+                .expect("??bind without virtIO, serious?")
+                .clone();
+            let handle = iface.sockets().lock_no_preempt().add(socket);
+            return Ok(Self { handle, iface });
+        } else {
+            let iface = get_iface_to_bind(address).ok_or(ENODEV)?;
+            let handle = iface.sockets().lock_no_preempt().add(socket);
+            // log::debug!("Bind to iface: {}", iface.iface_name());
+            // return Ok(Self { inner: vec![(handle, iface)] });
+            return Ok(Self { handle, iface });
+        }
+    }
+
+    pub fn bind_ephemeral<T>(
+        socket: T,
+        // socket_type: Types,
+        remote: smoltcp::wire::IpAddress,
+    ) -> Result<(Self, smoltcp::wire::IpAddress), SystemError>
+    where
+        T: smoltcp::socket::AnySocket<'static>,
+    {
+        let (iface, address) = get_ephemeral_iface(&remote);
+        // let bound_port = iface.port_manager().bind_ephemeral_port(socket_type)?;
+        let handle = iface.sockets().lock_no_preempt().add(socket);
+        // let endpoint = smoltcp::wire::IpEndpoint::new(local_addr, bound_port);
+        Ok((Self { handle, iface }, address))
+    }
+
+    pub fn port_manager(&self) -> &PortManager {
+        self.iface.port_manager()
+    }
+
+    pub fn with_mut<T: smoltcp::socket::AnySocket<'static>, R, F: FnMut(&mut T) -> R>(
+        &self,
+        mut f: F,
+    ) -> R {
+        f(self.iface.sockets().lock().get_mut::<T>(self.handle))
+    }
+
+    pub fn with<T: smoltcp::socket::AnySocket<'static>, R, F: Fn(&T) -> R>(&self, f: F) -> R {
+        f(self.iface.sockets().lock().get::<T>(self.handle))
+    }
+
+    pub fn iface(&self) -> &Arc<dyn Iface> {
+        &self.iface
+    }
+
+    pub fn release(&self) {
+        self.iface.sockets().lock().remove(self.handle);
+    }
+}
+
+#[inline]
+pub fn get_iface_to_bind(ip_addr: &smoltcp::wire::IpAddress) -> Option<Arc<dyn Iface>> {
+    // log::debug!("get_iface_to_bind: {:?}", ip_addr);
+    // if ip_addr.is_unspecified()
+    crate::net::NET_DEVICES
+        .read_irqsave()
+        .iter()
+        .find(|(_, iface)| {
+            let guard = iface.smol_iface().lock();
+            // log::debug!("iface name: {}, ip: {:?}", iface.iface_name(), guard.ip_addrs());
+            return guard.has_ip_addr(*ip_addr);
+        })
+        .map(|(_, iface)| iface.clone())
+}
+
+/// Get a suitable iface to deal with sendto/connect request if the socket is not bound to an iface.
+/// If the remote address is the same as that of some iface, we will use the iface.
+/// Otherwise, we will use a default interface.
+fn get_ephemeral_iface(
+    remote_ip_addr: &smoltcp::wire::IpAddress,
+) -> (Arc<dyn Iface>, smoltcp::wire::IpAddress) {
+    get_iface_to_bind(remote_ip_addr)
+        .map(|iface| (iface, *remote_ip_addr))
+        .or({
+            let ifaces = NET_DEVICES.read_irqsave();
+            ifaces.iter().find_map(|(_, iface)| {
+                iface
+                    .smol_iface()
+                    .lock()
+                    .ip_addrs()
+                    .iter()
+                    .find(|cidr| cidr.contains_addr(remote_ip_addr))
+                    .map(|cidr| (iface.clone(), cidr.address()))
+            })
+        })
+        .or({
+            NET_DEVICES.read_irqsave().values().next().map(|iface| {
+                (
+                    iface.clone(),
+                    iface.smol_iface().lock().ip_addrs()[0].address(),
+                )
+            })
+        })
+        .expect("No network interface")
+}

+ 114 - 0
kernel/src/net/socket/inet/common/port.rs

@@ -0,0 +1,114 @@
+use hashbrown::HashMap;
+use system_error::SystemError;
+
+use crate::{
+    arch::rand::rand,
+    libs::spinlock::SpinLock,
+    process::{Pid, ProcessManager},
+};
+
+use super::Types::{self, *};
+
+/// # TCP 和 UDP 的端口管理器。
+/// 如果 TCP/UDP 的 socket 绑定了某个端口,它会在对应的表中记录,以检测端口冲突。
+#[derive(Debug)]
+pub struct PortManager {
+    // TCP 端口记录表
+    tcp_port_table: SpinLock<HashMap<u16, Pid>>,
+    // UDP 端口记录表
+    udp_port_table: SpinLock<HashMap<u16, Pid>>,
+}
+
+impl PortManager {
+    pub fn new() -> Self {
+        return Self {
+            tcp_port_table: SpinLock::new(HashMap::new()),
+            udp_port_table: SpinLock::new(HashMap::new()),
+        };
+    }
+
+    /// @brief 自动分配一个相对应协议中未被使用的PORT,如果动态端口均已被占用,返回错误码 EADDRINUSE
+    pub fn get_ephemeral_port(&self, socket_type: Types) -> Result<u16, SystemError> {
+        // TODO: selects non-conflict high port
+
+        static mut EPHEMERAL_PORT: u16 = 0;
+        unsafe {
+            if EPHEMERAL_PORT == 0 {
+                EPHEMERAL_PORT = (49152 + rand() % (65536 - 49152)) as u16;
+            }
+        }
+
+        let mut remaining = 65536 - 49152; // 剩余尝试分配端口次数
+        let mut port: u16;
+        while remaining > 0 {
+            unsafe {
+                if EPHEMERAL_PORT == 65535 {
+                    EPHEMERAL_PORT = 49152;
+                } else {
+                    EPHEMERAL_PORT += 1;
+                }
+                port = EPHEMERAL_PORT;
+            }
+
+            // 使用 ListenTable 检查端口是否被占用
+            let listen_table_guard = match socket_type {
+                Udp => self.udp_port_table.lock(),
+                Tcp => self.tcp_port_table.lock(),
+                _ => panic!("{:?} cann't get a port", socket_type),
+            };
+            if listen_table_guard.get(&port).is_none() {
+                drop(listen_table_guard);
+                return Ok(port);
+            }
+            remaining -= 1;
+        }
+        return Err(SystemError::EADDRINUSE);
+    }
+
+    #[inline]
+    pub fn bind_ephemeral_port(&self, socket_type: Types) -> Result<u16, SystemError> {
+        let port = self.get_ephemeral_port(socket_type)?;
+        self.bind_port(socket_type, port)?;
+        return Ok(port);
+    }
+
+    /// @brief 检测给定端口是否已被占用,如果未被占用则在 TCP/UDP 对应的表中记录
+    ///
+    /// TODO: 增加支持端口复用的逻辑
+    pub fn bind_port(&self, socket_type: Types, port: u16) -> Result<(), SystemError> {
+        if port > 0 {
+            match socket_type {
+                Udp => {
+                    let mut guard = self.udp_port_table.lock();
+                    if guard.get(&port).is_some() {
+                        return Err(SystemError::EADDRINUSE);
+                    }
+                    guard.insert(port, ProcessManager::current_pid());
+                }
+                Tcp => {
+                    let mut guard = self.tcp_port_table.lock();
+                    if guard.get(&port).is_some() {
+                        return Err(SystemError::EADDRINUSE);
+                    }
+                    guard.insert(port, ProcessManager::current_pid());
+                }
+                _ => {}
+            };
+        }
+        return Ok(());
+    }
+
+    /// @brief 在对应的端口记录表中将端口和 socket 解绑
+    /// should call this function when socket is closed or aborted
+    pub fn unbind_port(&self, socket_type: Types, port: u16) {
+        match socket_type {
+            Udp => {
+                self.udp_port_table.lock().remove(&port);
+            }
+            Tcp => {
+                self.tcp_port_table.lock().remove(&port);
+            }
+            _ => {}
+        };
+    }
+}

+ 156 - 0
kernel/src/net/socket/inet/datagram/inner.rs

@@ -0,0 +1,156 @@
+use smoltcp;
+use system_error::SystemError::{self, *};
+
+use crate::{
+    libs::spinlock::SpinLock,
+    net::socket::inet::common::{BoundInner, Types as InetTypes},
+};
+
+pub type SmolUdpSocket = smoltcp::socket::udp::Socket<'static>;
+
+pub const DEFAULT_METADATA_BUF_SIZE: usize = 1024;
+pub const DEFAULT_RX_BUF_SIZE: usize = 64 * 1024;
+pub const DEFAULT_TX_BUF_SIZE: usize = 64 * 1024;
+
+#[derive(Debug)]
+pub struct UnboundUdp {
+    socket: SmolUdpSocket,
+}
+
+impl UnboundUdp {
+    pub fn new() -> Self {
+        let rx_buffer = smoltcp::socket::udp::PacketBuffer::new(
+            vec![smoltcp::socket::udp::PacketMetadata::EMPTY; DEFAULT_METADATA_BUF_SIZE],
+            vec![0; DEFAULT_RX_BUF_SIZE],
+        );
+        let tx_buffer = smoltcp::socket::udp::PacketBuffer::new(
+            vec![smoltcp::socket::udp::PacketMetadata::EMPTY; DEFAULT_METADATA_BUF_SIZE],
+            vec![0; DEFAULT_TX_BUF_SIZE],
+        );
+        let socket = SmolUdpSocket::new(rx_buffer, tx_buffer);
+
+        return Self { socket };
+    }
+
+    pub fn bind(
+        mut self,
+        local_endpoint: smoltcp::wire::IpEndpoint,
+    ) -> Result<BoundUdp, SystemError> {
+        // let (addr, port) = (local_endpoint.addr, local_endpoint.port);
+        if self.socket.bind(local_endpoint).is_err() {
+            return Err(EINVAL);
+        }
+        let inner = BoundInner::bind(self.socket, &local_endpoint.addr)?;
+        inner
+            .port_manager()
+            .bind_port(InetTypes::Udp, local_endpoint.port)?;
+        Ok(BoundUdp {
+            inner,
+            remote: SpinLock::new(None),
+        })
+    }
+
+    pub fn bind_ephemeral(self, remote: smoltcp::wire::IpAddress) -> Result<BoundUdp, SystemError> {
+        // let (addr, port) = (remote.addr, remote.port);
+        let (inner, address) = BoundInner::bind_ephemeral(self.socket, remote)?;
+        let bound_port = inner.port_manager().bind_ephemeral_port(InetTypes::Udp)?;
+        let endpoint = smoltcp::wire::IpEndpoint::new(address, bound_port);
+        Ok(BoundUdp {
+            inner,
+            remote: SpinLock::new(Some(endpoint)),
+        })
+    }
+
+    pub fn close(&mut self) {
+        self.socket.close();
+    }
+}
+
+#[derive(Debug)]
+pub struct BoundUdp {
+    inner: BoundInner,
+    remote: SpinLock<Option<smoltcp::wire::IpEndpoint>>,
+}
+
+impl BoundUdp {
+    pub fn with_mut_socket<F, T>(&self, f: F) -> T
+    where
+        F: FnMut(&mut SmolUdpSocket) -> T,
+    {
+        self.inner.with_mut(f)
+    }
+
+    pub fn with_socket<F, T>(&self, f: F) -> T
+    where
+        F: Fn(&SmolUdpSocket) -> T,
+    {
+        self.inner.with(f)
+    }
+
+    pub fn endpoint(&self) -> smoltcp::wire::IpListenEndpoint {
+        self.inner
+            .with::<SmolUdpSocket, _, _>(|socket| socket.endpoint())
+    }
+
+    pub fn connect(&self, remote: smoltcp::wire::IpEndpoint) {
+        self.remote.lock().replace(remote);
+    }
+
+    #[inline]
+    pub fn try_recv(
+        &self,
+        buf: &mut [u8],
+    ) -> Result<(usize, smoltcp::wire::IpEndpoint), SystemError> {
+        self.with_mut_socket(|socket| {
+            if socket.can_recv() {
+                if let Ok((size, metadata)) = socket.recv_slice(buf) {
+                    return Ok((size, metadata.endpoint));
+                }
+            }
+            return Err(EAGAIN_OR_EWOULDBLOCK);
+        })
+    }
+
+    #[inline]
+    pub fn can_recv(&self) -> bool {
+        self.with_socket(|socket| socket.can_recv())
+    }
+
+    pub fn try_send(
+        &self,
+        buf: &[u8],
+        to: Option<smoltcp::wire::IpEndpoint>,
+    ) -> Result<usize, SystemError> {
+        let remote = to.or(*self.remote.lock()).ok_or(ENOTCONN)?;
+
+        let result = self.with_mut_socket(|socket| {
+            if socket.can_send() && socket.send_slice(buf, remote).is_ok() {
+                log::debug!("send {} bytes", buf.len());
+                return Ok(buf.len());
+            }
+            return Err(ENOBUFS);
+        });
+        return result;
+    }
+
+    pub fn inner(&self) -> &BoundInner {
+        &self.inner
+    }
+
+    pub fn close(&self) {
+        self.inner
+            .iface()
+            .port_manager()
+            .unbind_port(InetTypes::Udp, self.endpoint().port);
+        self.with_mut_socket(|socket| {
+            socket.close();
+        });
+    }
+}
+
+// Udp Inner 负责其内部资源管理
+#[derive(Debug)]
+pub enum UdpInner {
+    Unbound(UnboundUdp),
+    Bound(BoundUdp),
+}

+ 453 - 0
kernel/src/net/socket/inet/datagram/mod.rs

@@ -0,0 +1,453 @@
+use inet::InetSocket;
+use smoltcp;
+use system_error::SystemError::{self, *};
+
+use crate::filesystem::vfs::IndexNode;
+use crate::libs::rwlock::RwLock;
+use crate::libs::spinlock::SpinLock;
+use crate::net::event_poll::EPollEventType;
+use crate::net::net_core::poll_ifaces;
+use crate::net::socket::*;
+use alloc::sync::{Arc, Weak};
+use core::sync::atomic::AtomicBool;
+
+pub mod inner;
+
+use inner::*;
+
+type EP = EPollEventType;
+
+// Udp Socket 负责提供状态切换接口、执行状态切换
+#[derive(Debug)]
+pub struct UdpSocket {
+    inner: RwLock<Option<UdpInner>>,
+    nonblock: AtomicBool,
+    wait_queue: WaitQueue,
+    self_ref: Weak<UdpSocket>,
+}
+
+impl UdpSocket {
+    pub fn new(nonblock: bool) -> Arc<Self> {
+        return Arc::new_cyclic(|me| Self {
+            inner: RwLock::new(Some(UdpInner::Unbound(UnboundUdp::new()))),
+            nonblock: AtomicBool::new(nonblock),
+            wait_queue: WaitQueue::default(),
+            self_ref: me.clone(),
+        });
+    }
+
+    pub fn is_nonblock(&self) -> bool {
+        self.nonblock.load(core::sync::atomic::Ordering::Relaxed)
+    }
+
+    pub fn do_bind(&self, local_endpoint: smoltcp::wire::IpEndpoint) -> Result<(), SystemError> {
+        let mut inner = self.inner.write();
+        if let Some(UdpInner::Unbound(unbound)) = inner.take() {
+            let bound = unbound.bind(local_endpoint)?;
+
+            bound
+                .inner()
+                .iface()
+                .common()
+                .bind_socket(self.self_ref.upgrade().unwrap());
+            *inner = Some(UdpInner::Bound(bound));
+            return Ok(());
+        }
+        return Err(EINVAL);
+    }
+
+    pub fn bind_emphemeral(&self, remote: smoltcp::wire::IpAddress) -> Result<(), SystemError> {
+        let mut inner_guard = self.inner.write();
+        let bound = match inner_guard.take().expect("Udp inner is None") {
+            UdpInner::Bound(inner) => inner,
+            UdpInner::Unbound(inner) => inner.bind_ephemeral(remote)?,
+        };
+        inner_guard.replace(UdpInner::Bound(bound));
+        return Ok(());
+    }
+
+    pub fn is_bound(&self) -> bool {
+        let inner = self.inner.read();
+        if let Some(UdpInner::Bound(_)) = &*inner {
+            return true;
+        }
+        return false;
+    }
+
+    pub fn close(&self) {
+        let mut inner = self.inner.write();
+        if let Some(UdpInner::Bound(bound)) = &mut *inner {
+            bound.close();
+            inner.take();
+        }
+    }
+
+    pub fn try_recv(
+        &self,
+        buf: &mut [u8],
+    ) -> Result<(usize, smoltcp::wire::IpEndpoint), SystemError> {
+        poll_ifaces();
+        let received = match self.inner.read().as_ref().expect("Udp Inner is None") {
+            UdpInner::Bound(bound) => bound.try_recv(buf),
+            _ => Err(ENOTCONN),
+        };
+        return received;
+    }
+
+    #[inline]
+    pub fn can_recv(&self) -> bool {
+        self.on_events().contains(EP::EPOLLIN)
+    }
+
+    #[inline]
+    pub fn can_send(&self) -> bool {
+        self.on_events().contains(EP::EPOLLOUT)
+    }
+
+    pub fn try_send(
+        &self,
+        buf: &[u8],
+        to: Option<smoltcp::wire::IpEndpoint>,
+    ) -> Result<usize, SystemError> {
+        {
+            let mut inner_guard = self.inner.write();
+            let inner = match inner_guard.take().expect("Udp Inner is None") {
+                UdpInner::Bound(bound) => bound,
+                UdpInner::Unbound(unbound) => {
+                    unbound.bind_ephemeral(to.ok_or(EADDRNOTAVAIL)?.addr)?
+                }
+            };
+            // size = inner.try_send(buf, to)?;
+            inner_guard.replace(UdpInner::Bound(inner));
+        };
+        // Optimize: 拿两次锁的平均效率是否比一次长时间的读锁效率要高?
+        let result = match self.inner.read().as_ref().expect("Udp Inner is None") {
+            UdpInner::Bound(bound) => bound.try_send(buf, to),
+            _ => Err(ENOTCONN),
+        };
+        poll_ifaces();
+        return result;
+    }
+
+    pub fn read(&self, buf: &mut [u8]) -> Result<usize, SystemError> {
+        if self.is_nonblock() {
+            return self.try_recv(buf).map(|(size, _)| size);
+        } else {
+            // return self
+            //     .wait_queue
+            //     .busy_wait(EP::EPOLLIN, || self.try_recv(buf).map(|(size, _)| size));
+            todo!()
+        }
+    }
+
+    pub fn on_events(&self) -> EPollEventType {
+        let mut event = EPollEventType::empty();
+        match self.inner.read().as_ref().unwrap() {
+            UdpInner::Unbound(_) => {
+                event.insert(EP::EPOLLOUT | EP::EPOLLWRNORM | EP::EPOLLWRBAND);
+            }
+            UdpInner::Bound(bound) => {
+                let (can_recv, can_send) =
+                    bound.with_socket(|socket| (socket.can_recv(), socket.can_send()));
+
+                if can_recv {
+                    event.insert(EP::EPOLLIN | EP::EPOLLRDNORM);
+                }
+
+                if can_send {
+                    event.insert(EP::EPOLLOUT | EP::EPOLLWRNORM | EP::EPOLLWRBAND);
+                } else {
+                    todo!("缓冲区空间不够,需要使用信号处理");
+                }
+            }
+        }
+        return event;
+    }
+}
+
+impl Socket for UdpSocket {
+    fn wait_queue(&self) -> &WaitQueue {
+        &self.wait_queue
+    }
+
+    fn poll(&self) -> usize {
+        self.on_events().bits() as usize
+    }
+
+    fn bind(&self, local_endpoint: Endpoint) -> Result<(), SystemError> {
+        if let Endpoint::Ip(local_endpoint) = local_endpoint {
+            return self.do_bind(local_endpoint);
+        }
+        Err(EAFNOSUPPORT)
+    }
+
+    fn send_buffer_size(&self) -> usize {
+        match self.inner.read().as_ref().unwrap() {
+            UdpInner::Bound(bound) => bound.with_socket(|socket| socket.payload_send_capacity()),
+            _ => inner::DEFAULT_TX_BUF_SIZE,
+        }
+    }
+
+    fn recv_buffer_size(&self) -> usize {
+        match self.inner.read().as_ref().unwrap() {
+            UdpInner::Bound(bound) => bound.with_socket(|socket| socket.payload_recv_capacity()),
+            _ => inner::DEFAULT_RX_BUF_SIZE,
+        }
+    }
+
+    fn connect(&self, endpoint: Endpoint) -> Result<(), SystemError> {
+        if let Endpoint::Ip(remote) = endpoint {
+            self.bind_emphemeral(remote.addr)?;
+            if let UdpInner::Bound(inner) = self.inner.read().as_ref().expect("UDP Inner disappear")
+            {
+                inner.connect(remote);
+                return Ok(());
+            } else {
+                panic!("");
+            }
+        }
+        return Err(EAFNOSUPPORT);
+    }
+
+    fn send(&self, buffer: &[u8], flags: MessageFlag) -> Result<usize, SystemError> {
+        // if flags.contains(MessageFlag::DONTWAIT) {
+
+        return self.try_send(buffer, None);
+        // } else {
+        //     // return self
+        //     //     .wait_queue
+        //     //     .busy_wait(EP::EPOLLOUT, || self.try_send(buffer, None));
+        //     todo!()
+        // }
+    }
+
+    fn send_to(
+        &self,
+        buffer: &[u8],
+        flags: MessageFlag,
+        address: Endpoint,
+    ) -> Result<usize, SystemError> {
+        // if flags.contains(MessageFlag::DONTWAIT) {
+        if let Endpoint::Ip(remote) = address {
+            return self.try_send(buffer, Some(remote));
+        }
+        // } else {
+        //     // return self
+        //     //     .wait_queue
+        //     //     .busy_wait(EP::EPOLLOUT, || {
+        //     //         if let Endpoint::Ip(remote) = address {
+        //     //             return self.try_send(buffer, Some(remote.addr));
+        //     //         }
+        //     //         return Err(EAFNOSUPPORT);
+        //     //     });
+        //     todo!()
+        // }
+        return Err(EINVAL);
+    }
+
+    fn recv(&self, buffer: &mut [u8], flags: MessageFlag) -> Result<usize, SystemError> {
+        use crate::sched::SchedMode;
+
+        return if self.is_nonblock() || flags.contains(MessageFlag::DONTWAIT) {
+            self.try_recv(buffer)
+        } else {
+            loop {
+                match self.try_recv(buffer) {
+                    Err(EAGAIN_OR_EWOULDBLOCK) => {
+                        wq_wait_event_interruptible!(self.wait_queue, self.can_recv(), {})?;
+                    }
+                    result => break result,
+                }
+            }
+        }
+        .map(|(len, _)| len);
+    }
+
+    fn recv_from(
+        &self,
+        buffer: &mut [u8],
+        flags: MessageFlag,
+        address: Option<Endpoint>,
+    ) -> Result<(usize, Endpoint), SystemError> {
+        use crate::sched::SchedMode;
+        // could block io
+        if let Some(endpoint) = address {
+            self.connect(endpoint)?;
+        }
+
+        return if self.is_nonblock() || flags.contains(MessageFlag::DONTWAIT) {
+            self.try_recv(buffer)
+        } else {
+            loop {
+                match self.try_recv(buffer) {
+                    Err(EAGAIN_OR_EWOULDBLOCK) => {
+                        wq_wait_event_interruptible!(self.wait_queue, self.can_recv(), {})?;
+                        log::debug!("UdpSocket::recv_from: wake up");
+                    }
+                    result => break result,
+                }
+            }
+        }
+        .map(|(len, remote)| (len, Endpoint::Ip(remote)));
+    }
+}
+
+impl InetSocket for UdpSocket {
+    fn on_iface_events(&self) {
+        return;
+    }
+}
+
+bitflags! {
+    pub struct UdpSocketOptions: u32 {
+        const ZERO = 0;        /* No UDP options */
+        const UDP_CORK = 1;         /* Never send partially complete segments */
+        const UDP_ENCAP = 100;      /* Set the socket to accept encapsulated packets */
+        const UDP_NO_CHECK6_TX = 101; /* Disable sending checksum for UDP6X */
+        const UDP_NO_CHECK6_RX = 102; /* Disable accepting checksum for UDP6 */
+        const UDP_SEGMENT = 103;    /* Set GSO segmentation size */
+        const UDP_GRO = 104;        /* This socket can receive UDP GRO packets */
+
+        const UDPLITE_SEND_CSCOV = 10; /* sender partial coverage (as sent)      */
+        const UDPLITE_RECV_CSCOV = 11; /* receiver partial coverage (threshold ) */
+    }
+}
+
+bitflags! {
+    pub struct UdpEncapTypes: u8 {
+        const ZERO = 0;
+        const ESPINUDP_NON_IKE = 1;     // draft-ietf-ipsec-nat-t-ike-00/01
+        const ESPINUDP = 2;             // draft-ietf-ipsec-udp-encaps-06
+        const L2TPINUDP = 3;            // rfc2661
+        const GTP0 = 4;                 // GSM TS 09.60
+        const GTP1U = 5;                // 3GPP TS 29.060
+        const RXRPC = 6;
+        const ESPINTCP = 7;             // Yikes, this is really xfrm encap types.
+    }
+}
+
+// fn sock_set_option(
+//     &self,
+//     _socket: &mut udp::Socket,
+//     _level: SocketOptionsLevel,
+//     optname: PosixSocketOption,
+//     _optval: &[u8],
+// ) -> Result<(), SystemError> {
+//     use PosixSocketOption::*;
+//     use SystemError::*;
+
+//     if optname == SO_BINDTODEVICE {
+//         todo!("SO_BINDTODEVICE");
+//     }
+
+//     match optname {
+//         SO_TYPE => {}
+//         SO_PROTOCOL => {}
+//         SO_DOMAIN => {}
+//         SO_ERROR => {
+//             return Err(ENOPROTOOPT);
+//         }
+//         SO_TIMESTAMP_OLD => {}
+//         SO_TIMESTAMP_NEW => {}
+//         SO_TIMESTAMPNS_OLD => {}
+
+//         SO_TIMESTAMPING_OLD => {}
+
+//         SO_RCVTIMEO_OLD => {}
+
+//         SO_SNDTIMEO_OLD => {}
+
+//         // if define CONFIG_NET_RX_BUSY_POLL
+//         SO_BUSY_POLL | SO_PREFER_BUSY_POLL | SO_BUSY_POLL_BUDGET => {
+//             debug!("Unsupported socket option: {:?}", optname);
+//             return Err(ENOPROTOOPT);
+//         }
+//         // end if
+//         optname => {
+//             debug!("Unsupported socket option: {:?}", optname);
+//             return Err(ENOPROTOOPT);
+//         }
+//     }
+//     return Ok(());
+// }
+
+// fn udp_set_option(
+//     &self,
+//     level: SocketOptionsLevel,
+//     optname: usize,
+//     optval: &[u8],
+// ) -> Result<(), SystemError> {
+//     use PosixSocketOption::*;
+
+//     let so_opt_name =
+//         PosixSocketOption::try_from(optname as i32)
+//             .map_err(|_| SystemError::ENOPROTOOPT)?;
+
+//     if level == SocketOptionsLevel::SOL_SOCKET {
+//         self.with_mut_socket(f)
+//         self.sock_set_option(self., level, so_opt_name, optval)?;
+//         if so_opt_name == SO_RCVBUF || so_opt_name == SO_RCVBUFFORCE {
+//             todo!("SO_RCVBUF");
+//         }
+//     }
+
+//     match UdpSocketOptions::from_bits_truncate(optname as u32) {
+//         UdpSocketOptions::UDP_CORK => {
+//             todo!("UDP_CORK");
+//         }
+//         UdpSocketOptions::UDP_ENCAP => {
+//             match UdpEncapTypes::from_bits_truncate(optval[0]) {
+//                 UdpEncapTypes::ESPINUDP_NON_IKE => {
+//                     todo!("ESPINUDP_NON_IKE");
+//                 }
+//                 UdpEncapTypes::ESPINUDP => {
+//                     todo!("ESPINUDP");
+//                 }
+//                 UdpEncapTypes::L2TPINUDP => {
+//                     todo!("L2TPINUDP");
+//                 }
+//                 UdpEncapTypes::GTP0 => {
+//                     todo!("GTP0");
+//                 }
+//                 UdpEncapTypes::GTP1U => {
+//                     todo!("GTP1U");
+//                 }
+//                 UdpEncapTypes::RXRPC => {
+//                     todo!("RXRPC");
+//                 }
+//                 UdpEncapTypes::ESPINTCP => {
+//                     todo!("ESPINTCP");
+//                 }
+//                 UdpEncapTypes::ZERO => {}
+//                 _ => {
+//                     return Err(SystemError::ENOPROTOOPT);
+//                 }
+//             }
+//         }
+//         UdpSocketOptions::UDP_NO_CHECK6_TX => {
+//             todo!("UDP_NO_CHECK6_TX");
+//         }
+//         UdpSocketOptions::UDP_NO_CHECK6_RX => {
+//             todo!("UDP_NO_CHECK6_RX");
+//         }
+//         UdpSocketOptions::UDP_SEGMENT => {
+//             todo!("UDP_SEGMENT");
+//         }
+//         UdpSocketOptions::UDP_GRO => {
+//             todo!("UDP_GRO");
+//         }
+
+//         UdpSocketOptions::UDPLITE_RECV_CSCOV => {
+//             todo!("UDPLITE_RECV_CSCOV");
+//         }
+//         UdpSocketOptions::UDPLITE_SEND_CSCOV => {
+//             todo!("UDPLITE_SEND_CSCOV");
+//         }
+
+//         UdpSocketOptions::ZERO => {}
+//         _ => {
+//             return Err(SystemError::ENOPROTOOPT);
+//         }
+//     }
+//     return Ok(());
+// }

+ 68 - 0
kernel/src/net/socket/inet/ip_def.rs

@@ -0,0 +1,68 @@
+
+bitflags! {
+    pub struct IpOptions: u32 {
+        const IP_TOS = 1;                     // Type of service
+        const IP_TTL = 2;                     // Time to live
+        const IP_HDRINCL = 3;                 // Header compression
+        const IP_OPTIONS = 4;                 // IP options
+        const IP_ROUTER_ALERT = 5;            // Router alert
+        const IP_RECVOPTS = 6;                // Receive options
+        const IP_RETOPTS = 7;                 // Return options
+        const IP_PKTINFO = 8;                 // Packet information
+        const IP_PKTOPTIONS = 9;              // Packet options
+        const IP_MTU_DISCOVER = 10;           // MTU discovery
+        const IP_RECVERR = 11;                // Receive errors
+        const IP_RECVTTL = 12;                // Receive time to live
+        const IP_RECVTOS = 13;                // Receive type of service
+        const IP_MTU = 14;                    // MTU
+        const IP_FREEBIND = 15;               // Freebind
+        const IP_IPSEC_POLICY = 16;           // IPsec policy
+        const IP_XFRM_POLICY = 17;            // IPipsec transform policy
+        const IP_PASSSEC = 18;                // Pass security
+        const IP_TRANSPARENT = 19;            // Transparent
+
+        const IP_RECVRETOPTS = 20;            // Receive return options (deprecated)
+
+        const IP_ORIGDSTADDR = 21;            // Originate destination address (used by TProxy)
+        const IP_RECVORIGDSTADDR = 21;        // Receive originate destination address
+
+        const IP_MINTTL = 22;                 // Minimum time to live
+        const IP_NODEFRAG = 23;               // Don't fragment (used by TProxy)
+        const IP_CHECKSUM = 24;               // Checksum offload (used by TProxy)
+        const IP_BIND_ADDRESS_NO_PORT = 25;   // Bind to address without port (used by TProxy)
+        const IP_RECVFRAGSIZE = 26;           // Receive fragment size
+        const IP_RECVERR_RFC4884 = 27;        // Receive ICMPv6 error notifications
+
+        const IP_PMTUDISC_DONT = 28;          // Don't send DF frames
+        const IP_PMTUDISC_DO = 29;            // Always DF
+        const IP_PMTUDISC_PROBE = 30;         // Ignore dst pmtu
+        const IP_PMTUDISC_INTERFACE = 31;     // Always use interface mtu (ignores dst pmtu)
+        const IP_PMTUDISC_OMIT = 32;          // Weaker version of IP_PMTUDISC_INTERFACE
+
+        const IP_MULTICAST_IF = 33;           // Multicast interface
+        const IP_MULTICAST_TTL = 34;          // Multicast time to live
+        const IP_MULTICAST_LOOP = 35;         // Multicast loopback
+        const IP_ADD_MEMBERSHIP = 36;         // Add multicast group membership
+        const IP_DROP_MEMBERSHIP = 37;        // Drop multicast group membership
+        const IP_UNBLOCK_SOURCE = 38;         // Unblock source
+        const IP_BLOCK_SOURCE = 39;           // Block source
+        const IP_ADD_SOURCE_MEMBERSHIP = 40;  // Add source multicast group membership
+        const IP_DROP_SOURCE_MEMBERSHIP = 41; // Drop source multicast group membership
+        const IP_MSFILTER = 42;               // Multicast source filter
+
+        const MCAST_JOIN_GROUP = 43;          // Join a multicast group
+        const MCAST_BLOCK_SOURCE = 44;        // Block a multicast source
+        const MCAST_UNBLOCK_SOURCE = 45;      // Unblock a multicast source
+        const MCAST_LEAVE_GROUP = 46;         // Leave a multicast group
+        const MCAST_JOIN_SOURCE_GROUP = 47;   // Join a multicast source group
+        const MCAST_LEAVE_SOURCE_GROUP = 48;  // Leave a multicast source group
+        const MCAST_MSFILTER = 49;           // Multicast source filter
+
+        const IP_MULTICAST_ALL = 50;          // Multicast all
+        const IP_UNICAST_IF = 51;             // Unicast interface
+        const IP_LOCAL_PORT_RANGE = 52;       // Local port range
+        const IP_PROTOCOL = 53;               // Protocol
+
+        // ... other flags ...
+    }
+}

+ 150 - 0
kernel/src/net/socket/inet/mod.rs

@@ -0,0 +1,150 @@
+use alloc::sync::Arc;
+use smoltcp;
+use system_error::SystemError::{self, *};
+
+// pub mod raw;
+// pub mod icmp;
+pub mod common;
+pub mod datagram;
+pub mod stream;
+pub mod syscall;
+
+pub use common::BoundInner;
+pub use common::Types;
+// pub use raw::RawSocket;
+pub use datagram::UdpSocket;
+pub use stream::TcpSocket;
+pub use syscall::Inet;
+
+use crate::filesystem::vfs::IndexNode;
+
+use super::Socket;
+
+use smoltcp::wire::*;
+/// A local endpoint, which indicates that the local endpoint is unspecified.
+///
+/// According to the Linux man pages and the Linux implementation, `getsockname()` will _not_ fail
+/// even if the socket is unbound. Instead, it will return an unspecified socket address. This
+/// unspecified endpoint helps with that.
+const UNSPECIFIED_LOCAL_ENDPOINT: IpEndpoint =
+    IpEndpoint::new(IpAddress::Ipv4(Ipv4Address::UNSPECIFIED), 0);
+
+pub trait InetSocket: Socket {
+    /// `on_iface_events`
+    /// 通知socket发生的事件
+    fn on_iface_events(&self);
+}
+
+// #[derive(Debug)]
+// pub enum InetSocket {
+//     // Raw(RawSocket),
+//     Udp(UdpSocket),
+//     Tcp(TcpSocket),
+// }
+
+// impl InetSocket {
+//     /// # `on_iface_events`
+//     /// 通知socket发生了事件
+//     pub fn on_iface_events(&self) {
+//         todo!()
+//     }
+// }
+
+// impl IndexNode for InetSocket {
+
+// }
+
+// impl Socket for InetSocket {
+//     fn epoll_items(&self) -> &super::common::poll_unit::EPollItems {
+//         match self {
+//             InetSocket::Udp(udp) => udp.epoll_items(),
+//             InetSocket::Tcp(tcp) => tcp.epoll_items(),
+//         }
+//     }
+
+//     fn bind(&self, endpoint: crate::net::Endpoint) -> Result<(), SystemError> {
+//         if let crate::net::Endpoint::Ip(ip) = endpoint {
+//             match self {
+//                 InetSocket::Udp(udp) => {
+//                     udp.do_bind(ip)?;
+//                 },
+//                 InetSocket::Tcp(tcp) => {
+//                     tcp.do_bind(ip)?;
+//                 },
+//             }
+//             return Ok(());
+//         }
+//         return Err(EINVAL);
+//     }
+
+//     fn wait_queue(&self) -> &super::common::poll_unit::WaitQueue {
+//         todo!()
+//     }
+
+//     fn on_iface_events(&self) {
+//         todo!()
+//     }
+// }
+
+// pub trait Socket: FileLike + Send + Sync {
+//     /// Assign the address specified by socket_addr to the socket
+//     fn bind(&self, _socket_addr: SocketAddr) -> Result<()> {
+//         return_errno_with_message!(Errno::EOPNOTSUPP, "bind() is not supported");
+//     }
+
+//     /// Build connection for a given address
+//     fn connect(&self, _socket_addr: SocketAddr) -> Result<()> {
+//         return_errno_with_message!(Errno::EOPNOTSUPP, "connect() is not supported");
+//     }
+
+//     /// Listen for connections on a socket
+//     fn listen(&self, _backlog: usize) -> Result<()> {
+//         return_errno_with_message!(Errno::EOPNOTSUPP, "listen() is not supported");
+//     }
+
+//     /// Accept a connection on a socket
+//     fn accept(&self) -> Result<(Arc<dyn FileLike>, SocketAddr)> {
+//         return_errno_with_message!(Errno::EOPNOTSUPP, "accept() is not supported");
+//     }
+
+//     /// Shut down part of a full-duplex connection
+//     fn shutdown(&self, _cmd: SockShutdownCmd) -> Result<()> {
+//         return_errno_with_message!(Errno::EOPNOTSUPP, "shutdown() is not supported");
+//     }
+
+//     /// Get address of this socket.
+//     fn addr(&self) -> Result<SocketAddr> {
+//         return_errno_with_message!(Errno::EOPNOTSUPP, "getsockname() is not supported");
+//     }
+
+//     /// Get address of peer socket
+//     fn peer_addr(&self) -> Result<SocketAddr> {
+//         return_errno_with_message!(Errno::EOPNOTSUPP, "getpeername() is not supported");
+//     }
+
+//     /// Get options on the socket. The resulted option will put in the `option` parameter, if
+//     /// this method returns success.
+//     fn get_option(&self, _option: &mut dyn SocketOption) -> Result<()> {
+//         return_errno_with_message!(Errno::EOPNOTSUPP, "getsockopt() is not supported");
+//     }
+
+//     /// Set options on the socket.
+//     fn set_option(&self, _option: &dyn SocketOption) -> Result<()> {
+//         return_errno_with_message!(Errno::EOPNOTSUPP, "setsockopt() is not supported");
+//     }
+
+//     /// Sends a message on a socket.
+//     fn sendmsg(
+//         &self,
+//         io_vecs: &[IoVec],
+//         message_header: MessageHeader,
+//         flags: SendRecvFlags,
+//     ) -> Result<usize>;
+
+//     /// Receives a message from a socket.
+//     ///
+//     /// If successful, the `io_vecs` buffer will be filled with the received content.
+//     /// This method returns the length of the received message,
+//     /// and the message header.
+//     fn recvmsg(&self, io_vecs: &[IoVec], flags: SendRecvFlags) -> Result<(usize, MessageHeader)>;
+// }

+ 443 - 0
kernel/src/net/socket/inet/stream/inner.rs

@@ -0,0 +1,443 @@
+use core::sync::atomic::{AtomicU32, AtomicUsize};
+
+use crate::libs::rwlock::RwLock;
+use crate::net::socket::EPollEventType;
+use crate::net::socket::{self, inet::Types};
+use alloc::vec::Vec;
+use smoltcp;
+use system_error::SystemError::{self, *};
+
+use super::inet::UNSPECIFIED_LOCAL_ENDPOINT;
+
+pub const DEFAULT_METADATA_BUF_SIZE: usize = 1024;
+pub const DEFAULT_RX_BUF_SIZE: usize = 512 * 1024;
+pub const DEFAULT_TX_BUF_SIZE: usize = 512 * 1024;
+
+fn new_smoltcp_socket() -> smoltcp::socket::tcp::Socket<'static> {
+    let rx_buffer = smoltcp::socket::tcp::SocketBuffer::new(vec![0; DEFAULT_RX_BUF_SIZE]);
+    let tx_buffer = smoltcp::socket::tcp::SocketBuffer::new(vec![0; DEFAULT_TX_BUF_SIZE]);
+    smoltcp::socket::tcp::Socket::new(rx_buffer, tx_buffer)
+}
+
+fn new_listen_smoltcp_socket<T>(local_endpoint: T) -> smoltcp::socket::tcp::Socket<'static>
+where
+    T: Into<smoltcp::wire::IpListenEndpoint>,
+{
+    let mut socket = new_smoltcp_socket();
+    socket.listen(local_endpoint).unwrap();
+    socket
+}
+
+#[derive(Debug)]
+pub enum Init {
+    Unbound(smoltcp::socket::tcp::Socket<'static>),
+    Bound((socket::inet::BoundInner, smoltcp::wire::IpEndpoint)),
+}
+
+impl Init {
+    pub(super) fn new() -> Self {
+        Init::Unbound(new_smoltcp_socket())
+    }
+
+    /// 传入一个已经绑定的socket
+    pub(super) fn new_bound(inner: socket::inet::BoundInner) -> Self {
+        let endpoint = inner.with::<smoltcp::socket::tcp::Socket, _, _>(|socket| {
+            socket
+                .local_endpoint()
+                .expect("A Bound Socket Must Have A Local Endpoint")
+        });
+        Init::Bound((inner, endpoint))
+    }
+
+    pub(super) fn bind(
+        self,
+        local_endpoint: smoltcp::wire::IpEndpoint,
+    ) -> Result<Self, SystemError> {
+        match self {
+            Init::Unbound(socket) => {
+                let bound = socket::inet::BoundInner::bind(socket, &local_endpoint.addr)?;
+                bound
+                    .port_manager()
+                    .bind_port(Types::Tcp, local_endpoint.port)?;
+                // bound.iface().common().bind_socket()
+                Ok(Init::Bound((bound, local_endpoint)))
+            }
+            Init::Bound(_) => Err(EINVAL),
+        }
+    }
+
+    pub(super) fn bind_to_ephemeral(
+        self,
+        remote_endpoint: smoltcp::wire::IpEndpoint,
+    ) -> Result<(socket::inet::BoundInner, smoltcp::wire::IpEndpoint), (Self, SystemError)> {
+        match self {
+            Init::Unbound(socket) => {
+                let (bound, address) =
+                    socket::inet::BoundInner::bind_ephemeral(socket, remote_endpoint.addr)
+                        .map_err(|err| (Self::new(), err))?;
+                let bound_port = bound
+                    .port_manager()
+                    .bind_ephemeral_port(Types::Tcp)
+                    .map_err(|err| (Self::new(), err))?;
+                let endpoint = smoltcp::wire::IpEndpoint::new(address, bound_port);
+                Ok((bound, endpoint))
+            }
+            Init::Bound(_) => Err((self, EINVAL)),
+        }
+    }
+
+    pub(super) fn connect(
+        self,
+        remote_endpoint: smoltcp::wire::IpEndpoint,
+    ) -> Result<Connecting, (Self, SystemError)> {
+        let (inner, local) = match self {
+            Init::Unbound(_) => self.bind_to_ephemeral(remote_endpoint)?,
+            Init::Bound(inner) => inner,
+        };
+        if local.addr.is_unspecified() {
+            return Err((Init::Bound((inner, local)), EINVAL));
+        }
+        let result = inner.with_mut::<smoltcp::socket::tcp::Socket, _, _>(|socket| {
+            socket
+                .connect(
+                    inner.iface().smol_iface().lock().context(),
+                    remote_endpoint,
+                    local,
+                )
+                .map_err(|_| ECONNREFUSED)
+        });
+        match result {
+            Ok(_) => Ok(Connecting::new(inner)),
+            Err(err) => Err((Init::Bound((inner, local)), err)),
+        }
+    }
+
+    /// # `listen`
+    pub(super) fn listen(self, backlog: usize) -> Result<Listening, (Self, SystemError)> {
+        let (inner, local) = match self {
+            Init::Unbound(_) => {
+                return Err((self, EINVAL));
+            }
+            Init::Bound(inner) => inner,
+        };
+        let listen_addr = if local.addr.is_unspecified() {
+            smoltcp::wire::IpListenEndpoint::from(local.port)
+        } else {
+            smoltcp::wire::IpListenEndpoint::from(local)
+        };
+        log::debug!("listen at {:?}", listen_addr);
+        let mut inners = Vec::new();
+        if let Err(err) = || -> Result<(), SystemError> {
+            for _ in 0..(backlog - 1) {
+                // -1 because the first one is already bound
+                let new_listen = socket::inet::BoundInner::bind(
+                    new_listen_smoltcp_socket(listen_addr),
+                    &local.addr,
+                )?;
+                inners.push(new_listen);
+            }
+            Ok(())
+        }() {
+            return Err((Init::Bound((inner, local)), err));
+        }
+
+        if let Err(err) = inner.with_mut::<smoltcp::socket::tcp::Socket, _, _>(|socket| {
+            socket.listen(listen_addr).map_err(|_| ECONNREFUSED)
+        }) {
+            return Err((Init::Bound((inner, local)), err));
+        }
+
+        inners.push(inner);
+        return Ok(Listening {
+            inners,
+            connect: AtomicUsize::new(0),
+        });
+    }
+}
+
+#[derive(Debug, Default, Clone, Copy)]
+enum ConnectResult {
+    Connected,
+    #[default]
+    Connecting,
+    Refused,
+}
+
+#[derive(Debug)]
+pub struct Connecting {
+    inner: socket::inet::BoundInner,
+    result: RwLock<ConnectResult>,
+}
+
+impl Connecting {
+    fn new(inner: socket::inet::BoundInner) -> Self {
+        Connecting {
+            inner,
+            result: RwLock::new(ConnectResult::Connecting),
+        }
+    }
+
+    pub fn with_mut<R, F: FnMut(&mut smoltcp::socket::tcp::Socket<'static>) -> R>(
+        &self,
+        f: F,
+    ) -> R {
+        self.inner.with_mut(f)
+    }
+
+    pub fn into_result(self) -> (Inner, Option<SystemError>) {
+        use ConnectResult::*;
+        let result = *self.result.read_irqsave();
+        match result {
+            Connecting => (Inner::Connecting(self), Some(EAGAIN_OR_EWOULDBLOCK)),
+            Connected => (Inner::Established(Established { inner: self.inner }), None),
+            Refused => (Inner::Init(Init::new_bound(self.inner)), Some(ECONNREFUSED)),
+        }
+    }
+
+    /// Returns `true` when `conn_result` becomes ready, which indicates that the caller should
+    /// invoke the `into_result()` method as soon as possible.
+    ///
+    /// Since `into_result()` needs to be called only once, this method will return `true`
+    /// _exactly_ once. The caller is responsible for not missing this event.
+    #[must_use]
+    pub(super) fn update_io_events(&self) -> bool {
+        if matches!(*self.result.read_irqsave(), ConnectResult::Connecting) {
+            return false;
+        }
+
+        self.inner
+            .with_mut(|socket: &mut smoltcp::socket::tcp::Socket| {
+                let mut result = self.result.write_irqsave();
+                if matches!(*result, ConnectResult::Refused | ConnectResult::Connected) {
+                    return false; // Already connected or refused
+                }
+
+                // Connected
+                if socket.can_send() {
+                    *result = ConnectResult::Connected;
+                    return true;
+                }
+                // Connecting
+                if socket.is_open() {
+                    return false;
+                }
+                // Refused
+                *result = ConnectResult::Refused;
+                return true;
+            })
+    }
+
+    pub fn get_name(&self) -> smoltcp::wire::IpEndpoint {
+        self.inner
+            .with::<smoltcp::socket::tcp::Socket, _, _>(|socket| {
+                socket
+                    .local_endpoint()
+                    .expect("A Connecting Tcp With No Local Endpoint")
+            })
+    }
+}
+
+#[derive(Debug)]
+pub struct Listening {
+    inners: Vec<socket::inet::BoundInner>,
+    connect: AtomicUsize,
+}
+
+impl Listening {
+    pub fn accept(&mut self) -> Result<(Established, smoltcp::wire::IpEndpoint), SystemError> {
+        let connected: &mut socket::inet::BoundInner = self
+            .inners
+            .get_mut(self.connect.load(core::sync::atomic::Ordering::Relaxed))
+            .unwrap();
+
+        if connected.with::<smoltcp::socket::tcp::Socket, _, _>(|socket| !socket.is_active()) {
+            return Err(EAGAIN_OR_EWOULDBLOCK);
+        }
+
+        let (local_endpoint, remote_endpoint) = connected
+            .with::<smoltcp::socket::tcp::Socket, _, _>(|socket| {
+                (
+                    socket
+                        .local_endpoint()
+                        .expect("A Connected Tcp With No Local Endpoint"),
+                    socket
+                        .remote_endpoint()
+                        .expect("A Connected Tcp With No Remote Endpoint"),
+                )
+            });
+
+        // log::debug!("local at {:?}", local_endpoint);
+
+        let mut new_listen = socket::inet::BoundInner::bind(
+            new_listen_smoltcp_socket(local_endpoint),
+            &local_endpoint.addr,
+        )?;
+
+        // swap the connected socket with the new_listen socket
+        // TODO is smoltcp socket swappable?
+        core::mem::swap(&mut new_listen, connected);
+
+        return Ok((Established { inner: new_listen }, remote_endpoint));
+    }
+
+    pub fn update_io_events(&self, pollee: &AtomicUsize) {
+        let position = self.inners.iter().position(|inner| {
+            inner.with::<smoltcp::socket::tcp::Socket, _, _>(|socket| socket.is_active())
+        });
+
+        if let Some(position) = position {
+            // log::debug!("Can accept!");
+            self.connect
+                .store(position, core::sync::atomic::Ordering::Relaxed);
+            pollee.fetch_or(
+                EPollEventType::EPOLLIN.bits() as usize,
+                core::sync::atomic::Ordering::Relaxed,
+            );
+        } else {
+            // log::debug!("Can't accept!");
+            pollee.fetch_and(
+                !EPollEventType::EPOLLIN.bits() as usize,
+                core::sync::atomic::Ordering::Relaxed,
+            );
+        }
+    }
+
+    pub fn get_name(&self) -> smoltcp::wire::IpEndpoint {
+        self.inners[0].with::<smoltcp::socket::tcp::Socket, _, _>(|socket| {
+            if let Some(name) = socket.local_endpoint() {
+                return name;
+            } else {
+                return UNSPECIFIED_LOCAL_ENDPOINT;
+            }
+        })
+    }
+}
+
+#[derive(Debug)]
+pub struct Established {
+    inner: socket::inet::BoundInner,
+}
+
+impl Established {
+    pub fn with_mut<R, F: FnMut(&mut smoltcp::socket::tcp::Socket<'static>) -> R>(
+        &self,
+        f: F,
+    ) -> R {
+        self.inner.with_mut(f)
+    }
+
+    pub fn with<R, F: Fn(&smoltcp::socket::tcp::Socket<'static>) -> R>(&self, f: F) -> R {
+        self.inner.with(f)
+    }
+
+    pub fn close(&self) {
+        self.inner
+            .with_mut::<smoltcp::socket::tcp::Socket, _, _>(|socket| socket.close());
+        self.inner.iface().poll();
+    }
+
+    pub fn release(&self) {
+        self.inner.release();
+    }
+
+    pub fn local_endpoint(&self) -> smoltcp::wire::IpEndpoint {
+        self.inner
+            .with::<smoltcp::socket::tcp::Socket, _, _>(|socket| socket.local_endpoint())
+            .unwrap()
+    }
+
+    pub fn remote_endpoint(&self) -> smoltcp::wire::IpEndpoint {
+        self.inner
+            .with::<smoltcp::socket::tcp::Socket, _, _>(|socket| socket.remote_endpoint().unwrap())
+    }
+
+    pub fn recv_slice(&self, buf: &mut [u8]) -> Result<usize, SystemError> {
+        self.inner
+            .with_mut::<smoltcp::socket::tcp::Socket, _, _>(|socket| {
+                use smoltcp::socket::tcp::RecvError::*;
+                if socket.can_send() {
+                    match socket.recv_slice(buf) {
+                        Ok(size) => Ok(size),
+                        Err(InvalidState) => {
+                            log::error!("TcpSocket::try_recv: InvalidState");
+                            Err(ENOTCONN)
+                        }
+                        Err(Finished) => Ok(0),
+                    }
+                } else {
+                    Err(ENOBUFS)
+                }
+            })
+    }
+
+    pub fn send_slice(&self, buf: &[u8]) -> Result<usize, SystemError> {
+        self.inner
+            .with_mut::<smoltcp::socket::tcp::Socket, _, _>(|socket| {
+                if socket.can_send() {
+                    socket.send_slice(buf).map_err(|_| ECONNABORTED)
+                } else {
+                    Err(ENOBUFS)
+                }
+            })
+    }
+
+    pub fn update_io_events(&self, pollee: &AtomicUsize) {
+        self.inner
+            .with_mut::<smoltcp::socket::tcp::Socket, _, _>(|socket| {
+                if socket.can_send() {
+                    pollee.fetch_or(
+                        EPollEventType::EPOLLOUT.bits() as usize,
+                        core::sync::atomic::Ordering::Relaxed,
+                    );
+                } else {
+                    pollee.fetch_and(
+                        !EPollEventType::EPOLLOUT.bits() as usize,
+                        core::sync::atomic::Ordering::Relaxed,
+                    );
+                }
+                if socket.can_recv() {
+                    pollee.fetch_or(
+                        EPollEventType::EPOLLIN.bits() as usize,
+                        core::sync::atomic::Ordering::Relaxed,
+                    );
+                } else {
+                    pollee.fetch_and(
+                        !EPollEventType::EPOLLIN.bits() as usize,
+                        core::sync::atomic::Ordering::Relaxed,
+                    );
+                }
+            })
+    }
+}
+
+#[derive(Debug)]
+pub enum Inner {
+    Init(Init),
+    Connecting(Connecting),
+    Listening(Listening),
+    Established(Established),
+}
+
+impl Inner {
+    pub fn send_buffer_size(&self) -> usize {
+        match self {
+            Inner::Init(_) => DEFAULT_TX_BUF_SIZE,
+            Inner::Connecting(conn) => conn.with_mut(|socket| socket.send_capacity()),
+            // only the first socket in the list is used for sending
+            Inner::Listening(listen) => listen.inners[0]
+                .with_mut::<smoltcp::socket::tcp::Socket, _, _>(|socket| socket.send_capacity()),
+            Inner::Established(est) => est.with_mut(|socket| socket.send_capacity()),
+        }
+    }
+
+    pub fn recv_buffer_size(&self) -> usize {
+        match self {
+            Inner::Init(_) => DEFAULT_RX_BUF_SIZE,
+            Inner::Connecting(conn) => conn.with_mut(|socket| socket.recv_capacity()),
+            // only the first socket in the list is used for receiving
+            Inner::Listening(listen) => listen.inners[0]
+                .with_mut::<smoltcp::socket::tcp::Socket, _, _>(|socket| socket.recv_capacity()),
+            Inner::Established(est) => est.with_mut(|socket| socket.recv_capacity()),
+        }
+    }
+}

+ 485 - 0
kernel/src/net/socket/inet/stream/mod.rs

@@ -0,0 +1,485 @@
+use alloc::sync::{Arc, Weak};
+use core::sync::atomic::{AtomicBool, AtomicUsize};
+use system_error::SystemError::{self, *};
+
+use crate::libs::rwlock::RwLock;
+use crate::net::event_poll::EPollEventType;
+use crate::net::net_core::poll_ifaces;
+use crate::net::socket::*;
+use crate::sched::SchedMode;
+use inet::{InetSocket, UNSPECIFIED_LOCAL_ENDPOINT};
+use smoltcp;
+
+pub mod inner;
+use inner::*;
+
+type EP = EPollEventType;
+#[derive(Debug)]
+pub struct TcpSocket {
+    inner: RwLock<Option<Inner>>,
+    shutdown: Shutdown,
+    nonblock: AtomicBool,
+    epitems: EPollItems,
+    wait_queue: WaitQueue,
+    self_ref: Weak<Self>,
+    pollee: AtomicUsize,
+}
+
+impl TcpSocket {
+    pub fn new(nonblock: bool) -> Arc<Self> {
+        Arc::new_cyclic(|me| Self {
+            inner: RwLock::new(Some(Inner::Init(Init::new()))),
+            shutdown: Shutdown::new(),
+            nonblock: AtomicBool::new(nonblock),
+            epitems: EPollItems::default(),
+            wait_queue: WaitQueue::default(),
+            self_ref: me.clone(),
+            pollee: AtomicUsize::new((EP::EPOLLIN.bits() | EP::EPOLLOUT.bits()) as usize),
+        })
+    }
+
+    pub fn new_established(inner: Established, nonblock: bool) -> Arc<Self> {
+        Arc::new_cyclic(|me| Self {
+            inner: RwLock::new(Some(Inner::Established(inner))),
+            shutdown: Shutdown::new(),
+            nonblock: AtomicBool::new(nonblock),
+            epitems: EPollItems::default(),
+            wait_queue: WaitQueue::default(),
+            self_ref: me.clone(),
+            pollee: AtomicUsize::new((EP::EPOLLIN.bits() | EP::EPOLLOUT.bits()) as usize),
+        })
+    }
+
+    pub fn is_nonblock(&self) -> bool {
+        self.nonblock.load(core::sync::atomic::Ordering::Relaxed)
+    }
+
+    #[inline]
+    fn write_state<F>(&self, mut f: F) -> Result<(), SystemError>
+    where
+        F: FnMut(Inner) -> Result<Inner, SystemError>,
+    {
+        let mut inner_guard = self.inner.write();
+        let inner = inner_guard.take().expect("Tcp Inner is None");
+        let update = f(inner)?;
+        inner_guard.replace(update);
+        Ok(())
+    }
+
+    pub fn do_bind(&self, local_endpoint: smoltcp::wire::IpEndpoint) -> Result<(), SystemError> {
+        let mut writer = self.inner.write();
+        match writer.take().expect("Tcp Inner is None") {
+            Inner::Init(inner) => {
+                let bound = inner.bind(local_endpoint)?;
+                if let Init::Bound((ref bound, _)) = bound {
+                    bound
+                        .iface()
+                        .common()
+                        .bind_socket(self.self_ref.upgrade().unwrap());
+                }
+                writer.replace(Inner::Init(bound));
+                Ok(())
+            }
+            _ => Err(EINVAL),
+        }
+    }
+
+    pub fn do_listen(&self, backlog: usize) -> Result<(), SystemError> {
+        let mut writer = self.inner.write();
+        let inner = writer.take().expect("Tcp Inner is None");
+        let (listening, err) = match inner {
+            Inner::Init(init) => {
+                let listen_result = init.listen(backlog);
+                match listen_result {
+                    Ok(listening) => (Inner::Listening(listening), None),
+                    Err((init, err)) => (Inner::Init(init), Some(err)),
+                }
+            }
+            _ => (inner, Some(EINVAL)),
+        };
+        writer.replace(listening);
+        drop(writer);
+
+        if let Some(err) = err {
+            return Err(err);
+        }
+        return Ok(());
+    }
+
+    pub fn try_accept(&self) -> Result<(Arc<TcpSocket>, smoltcp::wire::IpEndpoint), SystemError> {
+        poll_ifaces();
+        match self.inner.write().as_mut().expect("Tcp Inner is None") {
+            Inner::Listening(listening) => listening.accept().map(|(stream, remote)| {
+                (
+                    TcpSocket::new_established(stream, self.is_nonblock()),
+                    remote,
+                )
+            }),
+            _ => Err(EINVAL),
+        }
+    }
+
+    pub fn start_connect(
+        &self,
+        remote_endpoint: smoltcp::wire::IpEndpoint,
+    ) -> Result<(), SystemError> {
+        let mut writer = self.inner.write();
+        let inner = writer.take().expect("Tcp Inner is None");
+        let (init, err) = match inner {
+            Inner::Init(init) => {
+                let conn_result = init.connect(remote_endpoint);
+                match conn_result {
+                    Ok(connecting) => (
+                        Inner::Connecting(connecting),
+                        if self.is_nonblock() {
+                            None
+                        } else {
+                            Some(EINPROGRESS)
+                        },
+                    ),
+                    Err((init, err)) => (Inner::Init(init), Some(err)),
+                }
+            }
+            Inner::Connecting(connecting) if self.is_nonblock() => {
+                (Inner::Connecting(connecting), Some(EALREADY))
+            }
+            Inner::Connecting(connecting) => (Inner::Connecting(connecting), None),
+            Inner::Listening(inner) => (Inner::Listening(inner), Some(EISCONN)),
+            Inner::Established(inner) => (Inner::Established(inner), Some(EISCONN)),
+        };
+        writer.replace(init);
+
+        drop(writer);
+
+        poll_ifaces();
+
+        if let Some(err) = err {
+            return Err(err);
+        }
+        return Ok(());
+    }
+
+    pub fn finish_connect(&self) -> Result<(), SystemError> {
+        let mut writer = self.inner.write();
+        let Inner::Connecting(conn) = writer.take().expect("Tcp Inner is None") else {
+            log::error!("TcpSocket::finish_connect: not Connecting");
+            return Err(EINVAL);
+        };
+
+        let (inner, err) = conn.into_result();
+        writer.replace(inner);
+        drop(writer);
+
+        if let Some(err) = err {
+            return Err(err);
+        }
+        return Ok(());
+    }
+
+    pub fn check_connect(&self) -> Result<(), SystemError> {
+        match self.inner.read().as_ref().expect("Tcp Inner is None") {
+            Inner::Connecting(_) => Err(EAGAIN_OR_EWOULDBLOCK),
+            Inner::Established(_) => Ok(()), // TODO check established
+            _ => Err(EINVAL),                // TODO socket error options
+        }
+    }
+
+    pub fn try_recv(&self, buf: &mut [u8]) -> Result<usize, SystemError> {
+        poll_ifaces();
+        match self.inner.read().as_ref().expect("Tcp Inner is None") {
+            Inner::Established(inner) => inner.recv_slice(buf),
+            _ => Err(EINVAL),
+        }
+    }
+
+    pub fn try_send(&self, buf: &[u8]) -> Result<usize, SystemError> {
+        match self.inner.read().as_ref().expect("Tcp Inner is None") {
+            Inner::Established(inner) => {
+                let sent = inner.send_slice(buf);
+                poll_ifaces();
+                sent
+            }
+            _ => Err(EINVAL),
+        }
+    }
+
+    fn update_events(&self) -> bool {
+        match self.inner.read().as_ref().expect("Tcp Inner is None") {
+            Inner::Init(_) => false,
+            Inner::Connecting(connecting) => connecting.update_io_events(),
+            Inner::Established(established) => {
+                established.update_io_events(&self.pollee);
+                false
+            }
+            Inner::Listening(listening) => {
+                listening.update_io_events(&self.pollee);
+                false
+            }
+        }
+    }
+
+    // should only call on accept
+    fn is_acceptable(&self) -> bool {
+        // (self.poll() & EP::EPOLLIN.bits() as usize) != 0
+        EP::from_bits_truncate(self.poll() as u32).contains(EP::EPOLLIN)
+    }
+}
+
+impl Socket for TcpSocket {
+    fn wait_queue(&self) -> &WaitQueue {
+        &self.wait_queue
+    }
+
+    fn get_name(&self) -> Result<Endpoint, SystemError> {
+        match self.inner.read().as_ref().expect("Tcp Inner is None") {
+            Inner::Init(Init::Unbound(_)) => Ok(Endpoint::Ip(UNSPECIFIED_LOCAL_ENDPOINT)),
+            Inner::Init(Init::Bound((_, local))) => Ok(Endpoint::Ip(local.clone())),
+            Inner::Connecting(connecting) => Ok(Endpoint::Ip(connecting.get_name())),
+            Inner::Established(established) => Ok(Endpoint::Ip(established.local_endpoint())),
+            Inner::Listening(listening) => Ok(Endpoint::Ip(listening.get_name())),
+        }
+    }
+
+    fn bind(&self, endpoint: Endpoint) -> Result<(), SystemError> {
+        if let Endpoint::Ip(addr) = endpoint {
+            return self.do_bind(addr);
+        }
+        return Err(EINVAL);
+    }
+
+    fn connect(&self, endpoint: Endpoint) -> Result<(), SystemError> {
+        if let Endpoint::Ip(addr) = endpoint {
+            return self.start_connect(addr);
+        }
+        return Err(EINVAL);
+    }
+
+    fn poll(&self) -> usize {
+        self.pollee.load(core::sync::atomic::Ordering::Relaxed)
+    }
+
+    fn listen(&self, backlog: usize) -> Result<(), SystemError> {
+        self.do_listen(backlog)
+    }
+
+    fn accept(&self) -> Result<(Arc<Inode>, Endpoint), SystemError> {
+        // could block io
+        if self.is_nonblock() {
+            self.try_accept()
+        } else {
+            loop {
+                // log::debug!("TcpSocket::accept: wake up");
+                match self.try_accept() {
+                    Err(EAGAIN_OR_EWOULDBLOCK) => {
+                        wq_wait_event_interruptible!(self.wait_queue, self.is_acceptable(), {})?;
+                    }
+                    result => break result,
+                }
+            }
+        }
+        .map(|(inner, endpoint)| (Inode::new(inner), Endpoint::Ip(endpoint)))
+    }
+
+    fn recv(&self, buffer: &mut [u8], _flags: MessageFlag) -> Result<usize, SystemError> {
+        self.try_recv(buffer)
+    }
+
+    fn send(&self, buffer: &[u8], _flags: MessageFlag) -> Result<usize, SystemError> {
+        self.try_send(buffer)
+    }
+
+    fn send_buffer_size(&self) -> usize {
+        self.inner
+            .read()
+            .as_ref()
+            .expect("Tcp Inner is None")
+            .send_buffer_size()
+    }
+
+    fn recv_buffer_size(&self) -> usize {
+        self.inner
+            .read()
+            .as_ref()
+            .expect("Tcp Inner is None")
+            .recv_buffer_size()
+    }
+
+    fn close(&self) -> Result<(), SystemError> {
+        match self.inner.read().as_ref().expect("Tcp Inner is None") {
+            Inner::Init(_) => {}
+            Inner::Connecting(_) => {
+                return Err(EINPROGRESS);
+            }
+            Inner::Established(es) => {
+                es.close();
+                es.release();
+            }
+            Inner::Listening(_) => {}
+        }
+        Ok(())
+    }
+}
+
+impl InetSocket for TcpSocket {
+    fn on_iface_events(&self) {
+        if self.update_events() {
+            let result = self.finish_connect();
+            // set error
+        }
+    }
+}
+
+// #[derive(Debug)]
+// // #[cast_to([sync] IndexNode)]
+// struct TcpStream {
+//     inner: Established,
+//     shutdown: Shutdown,
+//     nonblock: AtomicBool,
+//     epitems: EPollItems,
+//     wait_queue: WaitQueue,
+//     self_ref: Weak<Self>,
+// }
+
+// impl TcpStream {
+//     pub fn is_nonblock(&self) -> bool {
+//         self.nonblock.load(core::sync::atomic::Ordering::Relaxed)
+//     }
+
+//     pub fn read(&self, buf: &mut [u8]) -> Result<usize, SystemError> {
+//         if self.nonblock.load(core::sync::atomic::Ordering::Relaxed) {
+//             return self.recv_slice(buf);
+//         } else {
+//             return self.wait_queue().busy_wait(
+//                 EP::EPOLLIN,
+//                 || self.recv_slice(buf)
+//             )
+//         }
+//     }
+
+//     pub fn recv_slice(&self, buf: &mut [u8]) -> Result<usize, SystemError> {
+//         let received = self.inner.recv_slice(buf);
+//         poll_ifaces();
+//         received
+//     }
+
+//     pub fn send_slice(&self, buf: &[u8]) -> Result<usize, SystemError> {
+//         let sent = self.inner.send_slice(buf);
+//         poll_ifaces();
+//         sent
+//     }
+// }
+
+// use crate::net::socket::{Inode, Socket};
+// use crate::filesystem::vfs::IndexNode;
+
+// impl IndexNode for TcpStream {
+//     fn read_at(
+//         &self,
+//         _offset: usize,
+//         _len: usize,
+//         buf: &mut [u8],
+//         data: crate::libs::spinlock::SpinLockGuard<crate::filesystem::vfs::FilePrivateData>,
+//     ) -> Result<usize, SystemError> {
+//         drop(data);
+//         self.read(buf)
+//     }
+
+//     fn write_at(
+//         &self,
+//         _offset: usize,
+//         _len: usize,
+//         buf: &[u8],
+//         data: crate::libs::spinlock::SpinLockGuard<crate::filesystem::vfs::FilePrivateData>,
+//     ) -> Result<usize, SystemError> {
+//         drop(data);
+//         self.send_slice(buf)
+//     }
+
+//     fn fs(&self) -> alloc::sync::Arc<dyn crate::filesystem::vfs::FileSystem> {
+//         todo!("TcpSocket::fs")
+//     }
+
+//     fn as_any_ref(&self) -> &dyn core::any::Any {
+//         self
+//     }
+
+//     fn list(&self) -> Result<alloc::vec::Vec<alloc::string::String>, SystemError> {
+//         todo!("TcpSocket::list")
+//     }
+
+// }
+
+// impl Socket for TcpStream {
+
+//     fn wait_queue(&self) -> WaitQueue {
+//         self.wait_queue.clone()
+//     }
+
+//     fn poll(&self) -> usize {
+//         // self.inner.with(|socket| {
+//         //     let mut mask = EPollEventType::empty();
+//         //     let shutdown = self.shutdown.get();
+//         //     let state = socket.state();
+//         //     use smoltcp::socket::tcp::State::*;
+//         //     type EP = crate::net::event_poll::EPollEventType;
+
+//         //     if shutdown.is_both_shutdown() || state == Closed {
+//         //         mask |= EP::EPOLLHUP;
+//         //     }
+
+//         //     if shutdown.is_recv_shutdown() {
+//         //         mask |= EP::EPOLLIN | EP::EPOLLRDNORM | EP::EPOLLRDHUP;
+//         //     }
+
+//         //     if state != SynSent && state != SynReceived {
+//         //         if socket.can_recv() {
+//         //             mask |= EP::EPOLLIN | EP::EPOLLRDNORM;
+//         //         }
+
+//         //         if !shutdown.is_send_shutdown() {
+//         //             // __sk_stream_is_writeable,这是一个内联函数,用于判断一个TCP套接字是否可写。
+//         //             //
+//         //             // 以下是函数的逐行解释:
+//         //             // static inline bool __sk_stream_is_writeable(const struct sock *sk, int wake)
+//         //             // - 这行定义了函数__sk_stream_is_writeable,它是一个内联函数(static inline),
+//         //             // 这意味着在调用点直接展开代码,而不是调用函数体。函数接收两个参数:
+//         //             // 一个指向struct sock对象的指针sk(代表套接字),和一个整型变量wake。
+//         //             //
+//         //             // return sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) &&
+//         //             // - 这行代码调用了sk_stream_wspace函数,获取套接字sk的可写空间(write space)大小。
+//         //             // 随后与sk_stream_min_wspace调用结果进行比较,该函数返回套接字为了保持稳定写入速度所需的
+//         //             // 最小可写空间。如果当前可写空间大于或等于最小可写空间,则表达式为真。
+//         //             //       __sk_stream_memory_free(sk, wake);
+//         //             // - 这行代码调用了__sk_stream_memory_free函数,它可能用于检查套接字的内存缓冲区是否
+//         //             // 有足够的空间可供写入数据。参数wake可能用于通知网络协议栈有数据需要发送,如果设置了相应的标志。
+//         //             // 综上所述,__sk_stream_is_writeable函数的目的是判断一个TCP套接字是否可以安全地进行写操作,
+//         //             // 它基于套接字的当前可写空间和所需的最小空间以及内存缓冲区的可用性。只有当这两个条件都满足时,
+//         //             // 函数才会返回true,表示套接字是可写的。
+//         //             if socket.can_send() {
+//         //                 mask |= EP::EPOLLOUT | EP::EPOLLWRNORM | EP::EPOLLWRBAND;
+//         //             } else {
+//         //                 todo!("TcpStream::poll: buffer space not enough");
+//         //             }
+//         //         } else {
+//         //             mask |= EP::EPOLLOUT | EP::EPOLLWRNORM;
+//         //         }
+//         //         // TODO tcp urg data => EPOLLPRI
+//         //     } else if state == SynSent /* inet_test_bit */ {
+//         //         log::warn!("Active TCP fastopen socket with defer_connect");
+//         //         mask |= EP::EPOLLOUT | EP::EPOLLWRNORM;
+//         //     }
+
+//         //     // TODO socket error
+//         //     return Ok(mask);
+//         // })
+//         self.pollee.load(core::sync::atomic::Ordering::Relaxed)
+//     }
+
+//     fn send_buffer_size(&self) -> usize {
+//         self.inner.with(|socket| socket.send_capacity())
+//     }
+
+//     fn recv_buffer_size(&self) -> usize {
+//         self.inner.with(|socket| socket.recv_capacity())
+//     }
+// }

+ 55 - 0
kernel/src/net/socket/inet/syscall.rs

@@ -0,0 +1,55 @@
+use alloc::sync::Arc;
+use smoltcp;
+use system_error::SystemError::{self, *};
+
+use inet::{TcpSocket, UdpSocket};
+
+// use crate::net::syscall_util::SysArgSocketType;
+use crate::net::socket::*;
+
+fn create_inet_socket(
+    socket_type: Type,
+    protocol: smoltcp::wire::IpProtocol,
+) -> Result<Arc<dyn Socket>, SystemError> {
+    log::debug!("type: {:?}, protocol: {:?}", socket_type, protocol);
+    use smoltcp::wire::IpProtocol::*;
+    use Type::*;
+    match socket_type {
+        Datagram => {
+            match protocol {
+                HopByHop | Udp => {
+                    return Ok(UdpSocket::new(false));
+                }
+                _ => {
+                    return Err(EPROTONOSUPPORT);
+                }
+            }
+            // if !matches!(protocol, Udp) {
+            //     return Err(EPROTONOSUPPORT);
+            // }
+            // return Ok(UdpSocket::new(false));
+        }
+        Stream => match protocol {
+            HopByHop | Tcp => {
+                return Ok(TcpSocket::new(false));
+            }
+            _ => {
+                return Err(EPROTONOSUPPORT);
+            }
+        },
+        Raw => {
+            todo!("raw")
+        }
+        _ => {
+            return Err(EPROTONOSUPPORT);
+        }
+    }
+}
+
+pub struct Inet;
+impl family::Family for Inet {
+    fn socket(stype: Type, protocol: u32) -> Result<Arc<Inode>, SystemError> {
+        let socket = create_inet_socket(stype, smoltcp::wire::IpProtocol::from(protocol as u8))?;
+        Ok(Inode::new(socket))
+    }
+}

+ 195 - 0
kernel/src/net/socket/inode.rs

@@ -0,0 +1,195 @@
+use crate::filesystem::vfs::IndexNode;
+use alloc::sync::Arc;
+use system_error::SystemError;
+
+use crate::net::socket::*;
+
+#[derive(Debug)]
+pub struct Inode {
+    inner: Arc<dyn Socket>,
+    epoll_items: EPollItems,
+}
+
+impl IndexNode for Inode {
+    fn read_at(
+        &self,
+        _offset: usize,
+        _len: usize,
+        buf: &mut [u8],
+        data: crate::libs::spinlock::SpinLockGuard<crate::filesystem::vfs::FilePrivateData>,
+    ) -> Result<usize, SystemError> {
+        drop(data);
+        self.inner.read(buf)
+    }
+
+    fn write_at(
+        &self,
+        _offset: usize,
+        _len: usize,
+        buf: &[u8],
+        data: crate::libs::spinlock::SpinLockGuard<crate::filesystem::vfs::FilePrivateData>,
+    ) -> Result<usize, SystemError> {
+        drop(data);
+        self.inner.write(buf)
+    }
+
+    /* Following are not yet available in socket */
+    fn as_any_ref(&self) -> &dyn core::any::Any {
+        self
+    }
+
+    /* filesystem associate interfaces are about unix and netlink socket */
+    fn fs(&self) -> Arc<dyn crate::filesystem::vfs::FileSystem> {
+        unimplemented!()
+    }
+
+    fn list(&self) -> Result<alloc::vec::Vec<alloc::string::String>, SystemError> {
+        unimplemented!()
+    }
+
+    fn poll(
+        &self,
+        private_data: &crate::filesystem::vfs::FilePrivateData,
+    ) -> Result<usize, SystemError> {
+        // let _ = private_data;
+        Ok(self.inner.poll())
+    }
+
+    fn open(
+        &self,
+        _data: crate::libs::spinlock::SpinLockGuard<crate::filesystem::vfs::FilePrivateData>,
+        _mode: &crate::filesystem::vfs::file::FileMode,
+    ) -> Result<(), SystemError> {
+        Ok(())
+    }
+
+    fn metadata(&self) -> Result<crate::filesystem::vfs::Metadata, SystemError> {
+        let meta = crate::filesystem::vfs::Metadata {
+            mode: crate::filesystem::vfs::syscall::ModeType::from_bits_truncate(0o755),
+            file_type: crate::filesystem::vfs::FileType::Socket,
+            size: self.send_buffer_size() as i64,
+            ..Default::default()
+        };
+
+        return Ok(meta);
+    }
+
+    fn close(
+        &self,
+        _data: crate::libs::spinlock::SpinLockGuard<crate::filesystem::vfs::FilePrivateData>,
+    ) -> Result<(), SystemError> {
+        self.inner.close()
+    }
+}
+
+impl Inode {
+    // pub fn wait_queue(&self) -> WaitQueue {
+    //     self.inner.wait_queue()
+    // }
+
+    pub fn send_buffer_size(&self) -> usize {
+        self.inner.send_buffer_size()
+    }
+
+    pub fn recv_buffer_size(&self) -> usize {
+        self.inner.recv_buffer_size()
+    }
+
+    pub fn accept(&self) -> Result<(Arc<Self>, Endpoint), SystemError> {
+        self.inner.accept()
+    }
+
+    pub fn bind(&self, endpoint: Endpoint) -> Result<(), SystemError> {
+        self.inner.bind(endpoint)
+    }
+
+    pub fn set_option(
+        &self,
+        level: OptionsLevel,
+        name: usize,
+        value: &[u8],
+    ) -> Result<(), SystemError> {
+        self.inner.set_option(level, name, value)
+    }
+
+    pub fn get_option(
+        &self,
+        level: OptionsLevel,
+        name: usize,
+        value: &mut [u8],
+    ) -> Result<usize, SystemError> {
+        self.inner.get_option(level, name, value)
+    }
+
+    pub fn listen(&self, backlog: usize) -> Result<(), SystemError> {
+        self.inner.listen(backlog)
+    }
+
+    pub fn send_to(
+        &self,
+        buffer: &[u8],
+        address: Endpoint,
+        flags: MessageFlag,
+    ) -> Result<usize, SystemError> {
+        self.inner.send_to(buffer, flags, address)
+    }
+
+    pub fn send(&self, buffer: &[u8], flags: MessageFlag) -> Result<usize, SystemError> {
+        self.inner.send(buffer, flags)
+    }
+
+    pub fn recv(&self, buffer: &mut [u8], flags: MessageFlag) -> Result<usize, SystemError> {
+        self.inner.recv(buffer, flags)
+    }
+
+    // TODO receive from split with endpoint or not
+    pub fn recv_from(
+        &self,
+        buffer: &mut [u8],
+        flags: MessageFlag,
+        address: Option<Endpoint>,
+    ) -> Result<(usize, Endpoint), SystemError> {
+        self.inner.recv_from(buffer, flags, address)
+    }
+
+    pub fn shutdown(&self, how: ShutdownTemp) -> Result<(), SystemError> {
+        self.inner.shutdown(how)
+    }
+
+    pub fn connect(&self, endpoint: Endpoint) -> Result<(), SystemError> {
+        self.inner.connect(endpoint)
+    }
+
+    pub fn get_name(&self) -> Result<Endpoint, SystemError> {
+        self.inner.get_name()
+    }
+
+    pub fn get_peer_name(&self) -> Result<Endpoint, SystemError> {
+        self.inner.get_peer_name()
+    }
+
+    pub fn new(inner: Arc<dyn Socket>) -> Arc<Self> {
+        Arc::new(Self {
+            inner,
+            epoll_items: EPollItems::default(),
+        })
+    }
+
+    /// # `epoll_items`
+    /// socket的epoll事件集
+    pub fn epoll_items(&self) -> EPollItems {
+        self.epoll_items.clone()
+    }
+
+    pub fn set_nonblock(&self, nonblock: bool) {
+        log::warn!("nonblock is not support yet");
+    }
+
+    pub fn set_close_on_exec(&self, close_on_exec: bool) {
+        log::warn!("close_on_exec is not support yet");
+    }
+
+    pub fn inner(&self) -> Arc<dyn Socket> {
+        return self.inner.clone();
+    }
+}

+ 26 - 917
kernel/src/net/socket/mod.rs

@@ -1,919 +1,28 @@
-use core::{any::Any, fmt::Debug, sync::atomic::AtomicUsize};
-
-use alloc::{
-    boxed::Box,
-    collections::LinkedList,
-    string::String,
-    sync::{Arc, Weak},
-    vec::Vec,
-};
-use hashbrown::HashMap;
-use log::warn;
-use smoltcp::{
-    iface::SocketSet,
-    socket::{self, raw, tcp, udp},
-};
-use system_error::SystemError;
-
-use crate::{
-    arch::rand::rand,
-    filesystem::vfs::{
-        file::FileMode, syscall::ModeType, FilePrivateData, FileSystem, FileType, IndexNode,
-        Metadata,
-    },
-    libs::{
-        rwlock::{RwLock, RwLockWriteGuard},
-        spinlock::{SpinLock, SpinLockGuard},
-        wait_queue::EventWaitQueue,
-    },
-    process::{Pid, ProcessManager},
-    sched::{schedule, SchedMode},
-};
-
-use self::{
-    handle::GlobalSocketHandle,
-    inet::{RawSocket, TcpSocket, UdpSocket},
-    unix::{SeqpacketSocket, StreamSocket},
-};
-
-use super::{
-    event_poll::{EPollEventType, EPollItem, EventPoll},
-    Endpoint, Protocol, ShutdownType,
-};
-
-pub mod handle;
+mod base;
+mod buffer;
+mod common;
+mod define;
+mod endpoint;
+mod family;
 pub mod inet;
+mod inode;
+pub mod netlink;
 pub mod unix;
-
-lazy_static! {
-    /// 所有socket的集合
-    /// TODO: 优化这里,自己实现SocketSet!!!现在这样的话,不管全局有多少个网卡,每个时间点都只会有1个进程能够访问socket
-    pub static ref SOCKET_SET: SpinLock<SocketSet<'static >> = SpinLock::new(SocketSet::new(vec![]));
-    /// SocketHandle表,每个SocketHandle对应一个SocketHandleItem,
-    /// 注意!:在网卡中断中需要拿到这张表的🔓,在获取读锁时应该确保关中断避免死锁
-    pub static ref HANDLE_MAP: RwLock<HashMap<GlobalSocketHandle, SocketHandleItem>> = RwLock::new(HashMap::new());
-    /// 端口管理器
-    pub static ref PORT_MANAGER: PortManager = PortManager::new();
-}
-
-/* For setsockopt(2) */
-// See: linux-5.19.10/include/uapi/asm-generic/socket.h#9
-pub const SOL_SOCKET: u8 = 1;
-
-/// 根据地址族、socket类型和协议创建socket
-pub(super) fn new_socket(
-    address_family: AddressFamily,
-    socket_type: PosixSocketType,
-    protocol: Protocol,
-) -> Result<Box<dyn Socket>, SystemError> {
-    let socket: Box<dyn Socket> = match address_family {
-        AddressFamily::Unix => match socket_type {
-            PosixSocketType::Stream => Box::new(StreamSocket::new(SocketOptions::default())),
-            PosixSocketType::SeqPacket => Box::new(SeqpacketSocket::new(SocketOptions::default())),
-            _ => {
-                return Err(SystemError::EINVAL);
-            }
-        },
-        AddressFamily::INet => match socket_type {
-            PosixSocketType::Stream => Box::new(TcpSocket::new(SocketOptions::default())),
-            PosixSocketType::Datagram => Box::new(UdpSocket::new(SocketOptions::default())),
-            PosixSocketType::Raw => Box::new(RawSocket::new(protocol, SocketOptions::default())),
-            _ => {
-                return Err(SystemError::EINVAL);
-            }
-        },
-        _ => {
-            return Err(SystemError::EAFNOSUPPORT);
-        }
-    };
-
-    let handle_item = SocketHandleItem::new(Arc::downgrade(&socket.posix_item()));
-    HANDLE_MAP
-        .write_irqsave()
-        .insert(socket.socket_handle(), handle_item);
-    Ok(socket)
-}
-
-pub trait Socket: Sync + Send + Debug + Any {
-    /// @brief 从socket中读取数据,如果socket是阻塞的,那么直到读取到数据才返回
-    ///
-    /// @param buf 读取到的数据存放的缓冲区
-    ///
-    /// @return - 成功:(返回读取的数据的长度,读取数据的端点).
-    ///         - 失败:错误码
-    fn read(&self, buf: &mut [u8]) -> (Result<usize, SystemError>, Endpoint);
-
-    /// @brief 向socket中写入数据。如果socket是阻塞的,那么直到写入的数据全部写入socket中才返回
-    ///
-    /// @param buf 要写入的数据
-    /// @param to 要写入的目的端点,如果是None,那么写入的数据将会被丢弃
-    ///
-    /// @return 返回写入的数据的长度
-    fn write(&self, buf: &[u8], to: Option<Endpoint>) -> Result<usize, SystemError>;
-
-    /// @brief 对应于POSIX的connect函数,用于连接到指定的远程服务器端点
-    ///
-    /// It is used to establish a connection to a remote server.
-    /// When a socket is connected to a remote server,
-    /// the operating system will establish a network connection with the server
-    /// and allow data to be sent and received between the local socket and the remote server.
-    ///
-    /// @param endpoint 要连接的端点
-    ///
-    /// @return 返回连接是否成功
-    fn connect(&mut self, _endpoint: Endpoint) -> Result<(), SystemError>;
-
-    /// @brief 对应于POSIX的bind函数,用于绑定到本机指定的端点
-    ///
-    /// The bind() function is used to associate a socket with a particular IP address and port number on the local machine.
-    ///
-    /// @param endpoint 要绑定的端点
-    ///
-    /// @return 返回绑定是否成功
-    fn bind(&mut self, _endpoint: Endpoint) -> Result<(), SystemError> {
-        Err(SystemError::ENOSYS)
-    }
-
-    /// @brief 对应于 POSIX 的 shutdown 函数,用于关闭socket。
-    ///
-    /// shutdown() 函数用于启动网络连接的正常关闭。
-    /// 当在两个端点之间建立网络连接时,任一端点都可以通过调用其端点对象上的 shutdown() 函数来启动关闭序列。
-    /// 此函数向远程端点发送关闭消息以指示本地端点不再接受新数据。
-    ///
-    /// @return 返回是否成功关闭
-    fn shutdown(&mut self, _type: ShutdownType) -> Result<(), SystemError> {
-        Err(SystemError::ENOSYS)
-    }
-
-    /// @brief 对应于POSIX的listen函数,用于监听端点
-    ///
-    /// @param backlog 最大的等待连接数
-    ///
-    /// @return 返回监听是否成功
-    fn listen(&mut self, _backlog: usize) -> Result<(), SystemError> {
-        Err(SystemError::ENOSYS)
-    }
-
-    /// @brief 对应于POSIX的accept函数,用于接受连接
-    ///
-    /// @param endpoint 对端的端点
-    ///
-    /// @return 返回接受连接是否成功
-    fn accept(&mut self) -> Result<(Box<dyn Socket>, Endpoint), SystemError> {
-        Err(SystemError::ENOSYS)
-    }
-
-    /// @brief 获取socket的端点
-    ///
-    /// @return 返回socket的端点
-    fn endpoint(&self) -> Option<Endpoint> {
-        None
-    }
-
-    /// @brief 获取socket的对端端点
-    ///
-    /// @return 返回socket的对端端点
-    fn peer_endpoint(&self) -> Option<Endpoint> {
-        None
-    }
-
-    /// @brief
-    ///     The purpose of the poll function is to provide
-    ///     a non-blocking way to check if a socket is ready for reading or writing,
-    ///     so that you can efficiently handle multiple sockets in a single thread or event loop.
-    ///
-    /// @return (in, out, err)
-    ///
-    ///     The first boolean value indicates whether the socket is ready for reading. If it is true, then there is data available to be read from the socket without blocking.
-    ///     The second boolean value indicates whether the socket is ready for writing. If it is true, then data can be written to the socket without blocking.
-    ///     The third boolean value indicates whether the socket has encountered an error condition. If it is true, then the socket is in an error state and should be closed or reset
-    ///
-    fn poll(&self) -> EPollEventType {
-        EPollEventType::empty()
-    }
-
-    /// @brief socket的ioctl函数
-    ///
-    /// @param cmd ioctl命令
-    /// @param arg0 ioctl命令的第一个参数
-    /// @param arg1 ioctl命令的第二个参数
-    /// @param arg2 ioctl命令的第三个参数
-    ///
-    /// @return 返回ioctl命令的返回值
-    fn ioctl(
-        &self,
-        _cmd: usize,
-        _arg0: usize,
-        _arg1: usize,
-        _arg2: usize,
-    ) -> Result<usize, SystemError> {
-        Ok(0)
-    }
-
-    /// @brief 获取socket的元数据
-    fn metadata(&self) -> SocketMetadata;
-
-    fn box_clone(&self) -> Box<dyn Socket>;
-
-    /// @brief 设置socket的选项
-    ///
-    /// @param level 选项的层次
-    /// @param optname 选项的名称
-    /// @param optval 选项的值
-    ///
-    /// @return 返回设置是否成功, 如果不支持该选项,返回ENOSYS
-    fn setsockopt(
-        &self,
-        _level: usize,
-        _optname: usize,
-        _optval: &[u8],
-    ) -> Result<(), SystemError> {
-        warn!("setsockopt is not implemented");
-        Ok(())
-    }
-
-    fn socket_handle(&self) -> GlobalSocketHandle;
-
-    fn write_buffer(&self, _buf: &[u8]) -> Result<usize, SystemError> {
-        todo!()
-    }
-
-    fn as_any_ref(&self) -> &dyn Any;
-
-    fn as_any_mut(&mut self) -> &mut dyn Any;
-
-    fn add_epoll(&mut self, epitem: Arc<EPollItem>) -> Result<(), SystemError> {
-        let posix_item = self.posix_item();
-        posix_item.add_epoll(epitem);
-        Ok(())
-    }
-
-    fn remove_epoll(&mut self, epoll: &Weak<SpinLock<EventPoll>>) -> Result<(), SystemError> {
-        let posix_item = self.posix_item();
-        posix_item.remove_epoll(epoll)?;
-
-        Ok(())
-    }
-
-    fn clear_epoll(&mut self) -> Result<(), SystemError> {
-        let posix_item = self.posix_item();
-
-        for epitem in posix_item.epitems.lock_irqsave().iter() {
-            let epoll = epitem.epoll();
-
-            if let Some(epoll) = epoll.upgrade() {
-                EventPoll::ep_remove(&mut epoll.lock_irqsave(), epitem.fd(), None)?;
-            }
-        }
-
-        Ok(())
-    }
-
-    fn close(&mut self);
-
-    fn posix_item(&self) -> Arc<PosixSocketHandleItem>;
-}
-
-impl Clone for Box<dyn Socket> {
-    fn clone(&self) -> Box<dyn Socket> {
-        self.box_clone()
-    }
-}
-
-/// # Socket在文件系统中的inode封装
-#[derive(Debug)]
-pub struct SocketInode(SpinLock<Box<dyn Socket>>, AtomicUsize);
-
-impl SocketInode {
-    pub fn new(socket: Box<dyn Socket>) -> Arc<Self> {
-        Arc::new(Self(SpinLock::new(socket), AtomicUsize::new(0)))
-    }
-
-    #[inline]
-    pub fn inner(&self) -> SpinLockGuard<Box<dyn Socket>> {
-        self.0.lock()
-    }
-
-    pub unsafe fn inner_no_preempt(&self) -> SpinLockGuard<Box<dyn Socket>> {
-        self.0.lock_no_preempt()
-    }
-
-    fn do_close(&self) -> Result<(), SystemError> {
-        let prev_ref_count = self.1.fetch_sub(1, core::sync::atomic::Ordering::SeqCst);
-        if prev_ref_count == 1 {
-            // 最后一次关闭,需要释放
-            let mut socket = self.0.lock_irqsave();
-
-            if socket.metadata().socket_type == SocketType::Unix {
-                return Ok(());
-            }
-
-            if let Some(Endpoint::Ip(Some(ip))) = socket.endpoint() {
-                PORT_MANAGER.unbind_port(socket.metadata().socket_type, ip.port);
-            }
-
-            socket.clear_epoll()?;
-
-            HANDLE_MAP
-                .write_irqsave()
-                .remove(&socket.socket_handle())
-                .unwrap();
-            socket.close();
-        }
-
-        Ok(())
-    }
-}
-
-impl Drop for SocketInode {
-    fn drop(&mut self) {
-        for _ in 0..self.1.load(core::sync::atomic::Ordering::SeqCst) {
-            let _ = self.do_close();
-        }
-    }
-}
-
-impl IndexNode for SocketInode {
-    fn open(
-        &self,
-        _data: SpinLockGuard<FilePrivateData>,
-        _mode: &FileMode,
-    ) -> Result<(), SystemError> {
-        self.1.fetch_add(1, core::sync::atomic::Ordering::SeqCst);
-        Ok(())
-    }
-
-    fn close(&self, _data: SpinLockGuard<FilePrivateData>) -> Result<(), SystemError> {
-        self.do_close()
-    }
-
-    fn read_at(
-        &self,
-        _offset: usize,
-        len: usize,
-        buf: &mut [u8],
-        data: SpinLockGuard<FilePrivateData>,
-    ) -> Result<usize, SystemError> {
-        drop(data);
-        self.0.lock_no_preempt().read(&mut buf[0..len]).0
-    }
-
-    fn write_at(
-        &self,
-        _offset: usize,
-        len: usize,
-        buf: &[u8],
-        data: SpinLockGuard<FilePrivateData>,
-    ) -> Result<usize, SystemError> {
-        drop(data);
-        self.0.lock_no_preempt().write(&buf[0..len], None)
-    }
-
-    fn poll(&self, _private_data: &FilePrivateData) -> Result<usize, SystemError> {
-        let events = self.0.lock_irqsave().poll();
-        return Ok(events.bits() as usize);
-    }
-
-    fn fs(&self) -> Arc<dyn FileSystem> {
-        todo!()
-    }
-
-    fn as_any_ref(&self) -> &dyn Any {
-        self
-    }
-
-    fn list(&self) -> Result<Vec<String>, SystemError> {
-        return Err(SystemError::ENOTDIR);
-    }
-
-    fn metadata(&self) -> Result<Metadata, SystemError> {
-        let meta = Metadata {
-            mode: ModeType::from_bits_truncate(0o755),
-            file_type: FileType::Socket,
-            ..Default::default()
-        };
-
-        return Ok(meta);
-    }
-
-    fn resize(&self, _len: usize) -> Result<(), SystemError> {
-        return Ok(());
-    }
-}
-
-#[derive(Debug)]
-pub struct PosixSocketHandleItem {
-    /// socket的waitqueue
-    wait_queue: Arc<EventWaitQueue>,
-
-    pub epitems: SpinLock<LinkedList<Arc<EPollItem>>>,
-}
-
-impl PosixSocketHandleItem {
-    pub fn new(wait_queue: Option<Arc<EventWaitQueue>>) -> Self {
-        Self {
-            wait_queue: wait_queue.unwrap_or(Arc::new(EventWaitQueue::new())),
-            epitems: SpinLock::new(LinkedList::new()),
-        }
-    }
-    /// ## 在socket的等待队列上睡眠
-    pub fn sleep(&self, events: u64) {
-        unsafe {
-            ProcessManager::preempt_disable();
-            self.wait_queue.sleep_without_schedule(events);
-            ProcessManager::preempt_enable();
-        }
-        schedule(SchedMode::SM_NONE);
-    }
-
-    pub fn add_epoll(&self, epitem: Arc<EPollItem>) {
-        self.epitems.lock_irqsave().push_back(epitem)
-    }
-
-    pub fn remove_epoll(&self, epoll: &Weak<SpinLock<EventPoll>>) -> Result<(), SystemError> {
-        let is_remove = !self
-            .epitems
-            .lock_irqsave()
-            .extract_if(|x| x.epoll().ptr_eq(epoll))
-            .collect::<Vec<_>>()
-            .is_empty();
-
-        if is_remove {
-            return Ok(());
-        }
-
-        Err(SystemError::ENOENT)
-    }
-
-    /// ### 唤醒该队列上等待events的进程
-    ///
-    ///  ### 参数
-    /// - events: 发生的事件
-    ///
-    /// 需要注意的是,只要触发了events中的任意一件事件,进程都会被唤醒
-    pub fn wakeup_any(&self, events: u64) {
-        self.wait_queue.wakeup_any(events);
-    }
-}
-#[derive(Debug)]
-pub struct SocketHandleItem {
-    /// 对应的posix socket是否为listen的
-    pub is_posix_listen: bool,
-    /// shutdown状态
-    pub shutdown_type: RwLock<ShutdownType>,
-    pub posix_item: Weak<PosixSocketHandleItem>,
-}
-
-impl SocketHandleItem {
-    pub fn new(posix_item: Weak<PosixSocketHandleItem>) -> Self {
-        Self {
-            is_posix_listen: false,
-            shutdown_type: RwLock::new(ShutdownType::empty()),
-            posix_item,
-        }
-    }
-
-    pub fn shutdown_type(&self) -> ShutdownType {
-        *self.shutdown_type.read()
-    }
-
-    pub fn shutdown_type_writer(&mut self) -> RwLockWriteGuard<ShutdownType> {
-        self.shutdown_type.write_irqsave()
-    }
-
-    pub fn reset_shutdown_type(&self) {
-        *self.shutdown_type.write() = ShutdownType::empty();
-    }
-
-    pub fn posix_item(&self) -> Option<Arc<PosixSocketHandleItem>> {
-        self.posix_item.upgrade()
-    }
-}
-
-/// # TCP 和 UDP 的端口管理器。
-/// 如果 TCP/UDP 的 socket 绑定了某个端口,它会在对应的表中记录,以检测端口冲突。
-pub struct PortManager {
-    // TCP 端口记录表
-    tcp_port_table: SpinLock<HashMap<u16, Pid>>,
-    // UDP 端口记录表
-    udp_port_table: SpinLock<HashMap<u16, Pid>>,
-}
-
-impl PortManager {
-    pub fn new() -> Self {
-        return Self {
-            tcp_port_table: SpinLock::new(HashMap::new()),
-            udp_port_table: SpinLock::new(HashMap::new()),
-        };
-    }
-
-    /// @brief 自动分配一个相对应协议中未被使用的PORT,如果动态端口均已被占用,返回错误码 EADDRINUSE
-    pub fn get_ephemeral_port(&self, socket_type: SocketType) -> Result<u16, SystemError> {
-        // TODO: selects non-conflict high port
-
-        static mut EPHEMERAL_PORT: u16 = 0;
-        unsafe {
-            if EPHEMERAL_PORT == 0 {
-                EPHEMERAL_PORT = (49152 + rand() % (65536 - 49152)) as u16;
-            }
-        }
-
-        let mut remaining = 65536 - 49152; // 剩余尝试分配端口次数
-        let mut port: u16;
-        while remaining > 0 {
-            unsafe {
-                if EPHEMERAL_PORT == 65535 {
-                    EPHEMERAL_PORT = 49152;
-                } else {
-                    EPHEMERAL_PORT += 1;
-                }
-                port = EPHEMERAL_PORT;
-            }
-
-            // 使用 ListenTable 检查端口是否被占用
-            let listen_table_guard = match socket_type {
-                SocketType::Udp => self.udp_port_table.lock(),
-                SocketType::Tcp => self.tcp_port_table.lock(),
-                _ => panic!("{:?} cann't get a port", socket_type),
-            };
-            if listen_table_guard.get(&port).is_none() {
-                drop(listen_table_guard);
-                return Ok(port);
-            }
-            remaining -= 1;
-        }
-        return Err(SystemError::EADDRINUSE);
-    }
-
-    /// @brief 检测给定端口是否已被占用,如果未被占用则在 TCP/UDP 对应的表中记录
-    ///
-    /// TODO: 增加支持端口复用的逻辑
-    pub fn bind_port(&self, socket_type: SocketType, port: u16) -> Result<(), SystemError> {
-        if port > 0 {
-            let mut listen_table_guard = match socket_type {
-                SocketType::Udp => self.udp_port_table.lock(),
-                SocketType::Tcp => self.tcp_port_table.lock(),
-                _ => panic!("{:?} cann't bind a port", socket_type),
-            };
-            match listen_table_guard.get(&port) {
-                Some(_) => return Err(SystemError::EADDRINUSE),
-                None => listen_table_guard.insert(port, ProcessManager::current_pid()),
-            };
-            drop(listen_table_guard);
-        }
-        return Ok(());
-    }
-
-    /// @brief 在对应的端口记录表中将端口和 socket 解绑
-    /// should call this function when socket is closed or aborted
-    pub fn unbind_port(&self, socket_type: SocketType, port: u16) {
-        let mut listen_table_guard = match socket_type {
-            SocketType::Udp => self.udp_port_table.lock(),
-            SocketType::Tcp => self.tcp_port_table.lock(),
-            _ => {
-                return;
-            }
-        };
-        listen_table_guard.remove(&port);
-        drop(listen_table_guard);
-    }
-}
-
-/// @brief socket的类型
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum SocketType {
-    /// 原始的socket
-    Raw,
-    /// 用于Tcp通信的 Socket
-    Tcp,
-    /// 用于Udp通信的 Socket
-    Udp,
-    /// unix域的 Socket
-    Unix,
-}
-
-bitflags! {
-    /// @brief socket的选项
-    #[derive(Default)]
-    pub struct SocketOptions: u32 {
-        /// 是否阻塞
-        const BLOCK = 1 << 0;
-        /// 是否允许广播
-        const BROADCAST = 1 << 1;
-        /// 是否允许多播
-        const MULTICAST = 1 << 2;
-        /// 是否允许重用地址
-        const REUSEADDR = 1 << 3;
-        /// 是否允许重用端口
-        const REUSEPORT = 1 << 4;
-    }
-}
-
-#[derive(Debug, Clone)]
-/// @brief 在trait Socket的metadata函数中返回该结构体供外部使用
-pub struct SocketMetadata {
-    /// socket的类型
-    pub socket_type: SocketType,
-    /// 接收缓冲区的大小
-    pub rx_buf_size: usize,
-    /// 发送缓冲区的大小
-    pub tx_buf_size: usize,
-    /// 元数据的缓冲区的大小
-    pub metadata_buf_size: usize,
-    /// socket的选项
-    pub options: SocketOptions,
-}
-
-impl SocketMetadata {
-    fn new(
-        socket_type: SocketType,
-        rx_buf_size: usize,
-        tx_buf_size: usize,
-        metadata_buf_size: usize,
-        options: SocketOptions,
-    ) -> Self {
-        Self {
-            socket_type,
-            rx_buf_size,
-            tx_buf_size,
-            metadata_buf_size,
-            options,
-        }
-    }
-}
-
-/// @brief 地址族的枚举
-///
-/// 参考:https://code.dragonos.org.cn/xref/linux-5.19.10/include/linux/socket.h#180
-#[derive(Debug, Clone, Copy, PartialEq, Eq, FromPrimitive, ToPrimitive)]
-pub enum AddressFamily {
-    /// AF_UNSPEC 表示地址族未指定
-    Unspecified = 0,
-    /// AF_UNIX 表示Unix域的socket (与AF_LOCAL相同)
-    Unix = 1,
-    ///  AF_INET 表示IPv4的socket
-    INet = 2,
-    /// AF_AX25 表示AMPR AX.25的socket
-    AX25 = 3,
-    /// AF_IPX 表示IPX的socket
-    IPX = 4,
-    /// AF_APPLETALK 表示Appletalk的socket
-    Appletalk = 5,
-    /// AF_NETROM 表示AMPR NET/ROM的socket
-    Netrom = 6,
-    /// AF_BRIDGE 表示多协议桥接的socket
-    Bridge = 7,
-    /// AF_ATMPVC 表示ATM PVCs的socket
-    Atmpvc = 8,
-    /// AF_X25 表示X.25的socket
-    X25 = 9,
-    /// AF_INET6 表示IPv6的socket
-    INet6 = 10,
-    /// AF_ROSE 表示AMPR ROSE的socket
-    Rose = 11,
-    /// AF_DECnet Reserved for DECnet project
-    Decnet = 12,
-    /// AF_NETBEUI Reserved for 802.2LLC project
-    Netbeui = 13,
-    /// AF_SECURITY 表示Security callback的伪AF
-    Security = 14,
-    /// AF_KEY 表示Key management API
-    Key = 15,
-    /// AF_NETLINK 表示Netlink的socket
-    Netlink = 16,
-    /// AF_PACKET 表示Low level packet interface
-    Packet = 17,
-    /// AF_ASH 表示Ash
-    Ash = 18,
-    /// AF_ECONET 表示Acorn Econet
-    Econet = 19,
-    /// AF_ATMSVC 表示ATM SVCs
-    Atmsvc = 20,
-    /// AF_RDS 表示Reliable Datagram Sockets
-    Rds = 21,
-    /// AF_SNA 表示Linux SNA Project
-    Sna = 22,
-    /// AF_IRDA 表示IRDA sockets
-    Irda = 23,
-    /// AF_PPPOX 表示PPPoX sockets
-    Pppox = 24,
-    /// AF_WANPIPE 表示WANPIPE API sockets
-    WanPipe = 25,
-    /// AF_LLC 表示Linux LLC
-    Llc = 26,
-    /// AF_IB 表示Native InfiniBand address
-    /// 介绍:https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/9/html-single/configuring_infiniband_and_rdma_networks/index#understanding-infiniband-and-rdma_configuring-infiniband-and-rdma-networks
-    Ib = 27,
-    /// AF_MPLS 表示MPLS
-    Mpls = 28,
-    /// AF_CAN 表示Controller Area Network
-    Can = 29,
-    /// AF_TIPC 表示TIPC sockets
-    Tipc = 30,
-    /// AF_BLUETOOTH 表示Bluetooth sockets
-    Bluetooth = 31,
-    /// AF_IUCV 表示IUCV sockets
-    Iucv = 32,
-    /// AF_RXRPC 表示RxRPC sockets
-    Rxrpc = 33,
-    /// AF_ISDN 表示mISDN sockets
-    Isdn = 34,
-    /// AF_PHONET 表示Phonet sockets
-    Phonet = 35,
-    /// AF_IEEE802154 表示IEEE 802.15.4 sockets
-    Ieee802154 = 36,
-    /// AF_CAIF 表示CAIF sockets
-    Caif = 37,
-    /// AF_ALG 表示Algorithm sockets
-    Alg = 38,
-    /// AF_NFC 表示NFC sockets
-    Nfc = 39,
-    /// AF_VSOCK 表示vSockets
-    Vsock = 40,
-    /// AF_KCM 表示Kernel Connection Multiplexor
-    Kcm = 41,
-    /// AF_QIPCRTR 表示Qualcomm IPC Router
-    Qipcrtr = 42,
-    /// AF_SMC 表示SMC-R sockets.
-    /// reserve number for PF_SMC protocol family that reuses AF_INET address family
-    Smc = 43,
-    /// AF_XDP 表示XDP sockets
-    Xdp = 44,
-    /// AF_MCTP 表示Management Component Transport Protocol
-    Mctp = 45,
-    /// AF_MAX 表示最大的地址族
-    Max = 46,
-}
-
-impl TryFrom<u16> for AddressFamily {
-    type Error = SystemError;
-    fn try_from(x: u16) -> Result<Self, Self::Error> {
-        use num_traits::FromPrimitive;
-        return <Self as FromPrimitive>::from_u16(x).ok_or(SystemError::EINVAL);
-    }
-}
-
-/// @brief posix套接字类型的枚举(这些值与linux内核中的值一致)
-#[derive(Debug, Clone, Copy, PartialEq, Eq, FromPrimitive, ToPrimitive)]
-pub enum PosixSocketType {
-    Stream = 1,
-    Datagram = 2,
-    Raw = 3,
-    Rdm = 4,
-    SeqPacket = 5,
-    Dccp = 6,
-    Packet = 10,
-}
-
-impl TryFrom<u8> for PosixSocketType {
-    type Error = SystemError;
-    fn try_from(x: u8) -> Result<Self, Self::Error> {
-        use num_traits::FromPrimitive;
-        return <Self as FromPrimitive>::from_u8(x).ok_or(SystemError::EINVAL);
-    }
-}
-
-/// ### 为socket提供无锁的poll方法
-///
-/// 因为在网卡中断中,需要轮询socket的状态,如果使用socket文件或者其inode来poll
-/// 在当前的设计,会必然死锁,所以引用这一个设计来解决,提供无🔓的poll
-pub struct SocketPollMethod;
-
-impl SocketPollMethod {
-    pub fn poll(socket: &socket::Socket, handle_item: &SocketHandleItem) -> EPollEventType {
-        let shutdown = handle_item.shutdown_type();
-        match socket {
-            socket::Socket::Udp(udp) => Self::udp_poll(udp, shutdown),
-            socket::Socket::Tcp(tcp) => Self::tcp_poll(tcp, shutdown, handle_item.is_posix_listen),
-            socket::Socket::Raw(raw) => Self::raw_poll(raw, shutdown),
-            _ => todo!(),
-        }
-    }
-
-    pub fn tcp_poll(
-        socket: &tcp::Socket,
-        shutdown: ShutdownType,
-        is_posix_listen: bool,
-    ) -> EPollEventType {
-        let mut events = EPollEventType::empty();
-        // debug!("enter tcp_poll! is_posix_listen:{}", is_posix_listen);
-        // 处理listen的socket
-        if is_posix_listen {
-            // 如果是listen的socket,那么只有EPOLLIN和EPOLLRDNORM
-            if socket.is_active() {
-                events.insert(EPollEventType::EPOLL_LISTEN_CAN_ACCEPT);
-            }
-
-            // debug!("tcp_poll listen socket! events:{:?}", events);
-            return events;
-        }
-
-        let state = socket.state();
-
-        if shutdown == ShutdownType::SHUTDOWN_MASK || state == tcp::State::Closed {
-            events.insert(EPollEventType::EPOLLHUP);
-        }
-
-        if shutdown.contains(ShutdownType::RCV_SHUTDOWN) {
-            events.insert(
-                EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM | EPollEventType::EPOLLRDHUP,
-            );
-        }
-
-        // Connected or passive Fast Open socket?
-        if state != tcp::State::SynSent && state != tcp::State::SynReceived {
-            // socket有可读数据
-            if socket.can_recv() {
-                events.insert(EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM);
-            }
-
-            if !(shutdown.contains(ShutdownType::SEND_SHUTDOWN)) {
-                // 缓冲区可写(这里判断可写的逻辑好像跟linux不太一样)
-                if socket.send_queue() < socket.send_capacity() {
-                    events.insert(EPollEventType::EPOLLOUT | EPollEventType::EPOLLWRNORM);
-                } else {
-                    // TODO:触发缓冲区已满的信号SIGIO
-                    todo!("A signal SIGIO that the buffer is full needs to be sent");
-                }
-            } else {
-                // 如果我们的socket关闭了SEND_SHUTDOWN,epoll事件就是EPOLLOUT
-                events.insert(EPollEventType::EPOLLOUT | EPollEventType::EPOLLWRNORM);
-            }
-        } else if state == tcp::State::SynSent {
-            events.insert(EPollEventType::EPOLLOUT | EPollEventType::EPOLLWRNORM);
-        }
-
-        // socket发生错误
-        // TODO: 这里的逻辑可能有问题,需要进一步验证是否is_active()==false就代表socket发生错误
-        if !socket.is_active() {
-            events.insert(EPollEventType::EPOLLERR);
-        }
-
-        events
-    }
-
-    pub fn udp_poll(socket: &udp::Socket, shutdown: ShutdownType) -> EPollEventType {
-        let mut event = EPollEventType::empty();
-
-        if shutdown.contains(ShutdownType::RCV_SHUTDOWN) {
-            event.insert(
-                EPollEventType::EPOLLRDHUP | EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM,
-            );
-        }
-        if shutdown.contains(ShutdownType::SHUTDOWN_MASK) {
-            event.insert(EPollEventType::EPOLLHUP);
-        }
-
-        if socket.can_recv() {
-            event.insert(EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM);
-        }
-
-        if socket.can_send() {
-            event.insert(
-                EPollEventType::EPOLLOUT
-                    | EPollEventType::EPOLLWRNORM
-                    | EPollEventType::EPOLLWRBAND,
-            );
-        } else {
-            // TODO: 缓冲区空间不够,需要使用信号处理
-            todo!()
-        }
-
-        return event;
-    }
-
-    pub fn raw_poll(socket: &raw::Socket, shutdown: ShutdownType) -> EPollEventType {
-        //debug!("enter raw_poll!");
-        let mut event = EPollEventType::empty();
-
-        if shutdown.contains(ShutdownType::RCV_SHUTDOWN) {
-            event.insert(
-                EPollEventType::EPOLLRDHUP | EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM,
-            );
-        }
-        if shutdown.contains(ShutdownType::SHUTDOWN_MASK) {
-            event.insert(EPollEventType::EPOLLHUP);
-        }
-
-        if socket.can_recv() {
-            //debug!("poll can recv!");
-            event.insert(EPollEventType::EPOLLIN | EPollEventType::EPOLLRDNORM);
-        } else {
-            //debug!("poll can not recv!");
-        }
-
-        if socket.can_send() {
-            //debug!("poll can send!");
-            event.insert(
-                EPollEventType::EPOLLOUT
-                    | EPollEventType::EPOLLWRNORM
-                    | EPollEventType::EPOLLWRBAND,
-            );
-        } else {
-            //debug!("poll can not send!");
-            // TODO: 缓冲区空间不够,需要使用信号处理
-            todo!()
-        }
-        return event;
-    }
-}
+mod utils;
+
+use crate::libs::wait_queue::WaitQueue;
+pub use base::Socket;
+use buffer::Buffer;
+pub use common::{
+    shutdown::*,
+    // poll_unit::{EPollItems, WaitQueue},
+    EPollItems,
+};
+pub use define::*;
+pub use endpoint::*;
+pub use family::{AddressFamily, Family};
+pub use inode::Inode;
+pub use utils::create_socket;
+
+pub use crate::net::event_poll::EPollEventType;
+// pub use crate::net::sys

+ 1268 - 0
kernel/src/net/socket/netlink/af_netlink.rs

@@ -0,0 +1,1268 @@
+// 参考https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/af_netlink.c
+use core::cmp::{max, min};
+use core::ops::{Deref, DerefMut};
+use core::{any::Any, fmt::Debug, hash::Hash};
+use core::{mem, slice};
+
+use alloc::string::String;
+use alloc::sync::{Arc, Weak};
+
+use hashbrown::HashMap;
+use intertrait::cast::CastBox;
+use intertrait::CastFromSync;
+use log::warn;
+use netlink::callback;
+use netlink::netlink::{sk_data_ready, NetlinkKernelCfg};
+use num::Zero;
+use system_error::SystemError;
+use unified_init::macros::unified_init;
+
+use crate::filesystem::vfs::{FilePrivateData, FileSystem, IndexNode};
+use crate::libs::mutex::Mutex;
+use crate::libs::rwlock::RwLockWriteGuard;
+use crate::libs::spinlock::{SpinLock, SpinLockGuard};
+use crate::net::event_poll::{EPollEventType, EPollItem, EventPoll};
+use crate::net::socket::netlink::skbuff::SkBuff;
+use crate::net::socket::*;
+use crate::net::syscall::{MsgHdr, SockAddr, SockAddrNl};
+use crate::time::timer::schedule_timeout;
+use crate::{libs::rwlock::RwLock, syscall::Syscall};
+use alloc::{boxed::Box, vec::Vec};
+use system_error::SystemError::ECONNREFUSED;
+
+use crate::net::socket::{AddressFamily, Endpoint, Inode, MessageFlag, Socket};
+use lazy_static::lazy_static;
+
+use super::callback::NetlinkCallback;
+use super::endpoint::NetlinkEndpoint;
+use super::netlink::{
+    NLmsgFlags, NLmsgType, NLmsghdr, VecExt, NETLINK_USERSOCK, NL_CFG_F_NONROOT_SEND,
+};
+use super::netlink_proto::{proto_register, Proto, NETLINK_PROTO};
+use super::skbuff::{netlink_overrun, skb_orphan, skb_shared};
+use super::sock::SockFlags;
+use crate::init::initcall::INITCALL_CORE;
+use crate::net::socket::netlink::netlink::NetlinkState;
+// Flags constants
+bitflags! {
+    pub struct NetlinkFlags: u32 {
+        const KERNEL_SOCKET = 0x1;
+        const RECV_PKTINFO = 0x2;
+        const BROADCAST_SEND_ERROR = 0x4;
+        const RECV_NO_ENOBUFS = 0x8;
+        const LISTEN_ALL_NSID = 0x10;
+        const CAP_ACK = 0x20;
+        const EXT_ACK = 0x40;
+        const STRICT_CHK = 0x80;
+        const NETLINK_F_KERNEL_SOCKET = 0x100;
+    }
+}
+#[derive(Clone, Debug)]
+pub struct HListHead {
+    first: Option<Arc<HListNode>>,
+}
+#[derive(Debug)]
+pub struct HListNode {
+    data: Arc<Mutex<Box<dyn NetlinkSocket>>>,
+    next: Option<Arc<HListNode>>,
+}
+impl HListHead {
+    fn iter(&self) -> HListHeadIter {
+        HListHeadIter {
+            current: self.first.as_ref(),
+        }
+    }
+}
+
+struct HListHeadIter<'a> {
+    current: Option<&'a Arc<HListNode>>,
+}
+
+impl<'a> Iterator for HListHeadIter<'a> {
+    type Item = &'a Arc<Mutex<Box<dyn NetlinkSocket>>>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self.current {
+            Some(node) => {
+                self.current = node.next.as_ref();
+                Some(&node.data)
+            }
+            None => None,
+        }
+    }
+}
+/// 每一个netlink协议族都有一个NetlinkTable,用于保存该协议族的所有netlink套接字
+pub struct NetlinkTable {
+    hash: HashMap<u32, Arc<Mutex<Box<dyn NetlinkSocket>>>>,
+    listeners: Option<Listeners>,
+    registered: u32,
+    flags: u32,
+    groups: u32,
+    mc_list: HListHead,
+    pub bind: Option<Arc<dyn Fn(i32) -> i32 + Send + Sync>>,
+    pub unbind: Option<Arc<dyn Fn(i32) -> i32 + Send + Sync>>,
+    pub compare: Option<Arc<dyn Fn(&NetlinkSock) -> bool + Send + Sync>>,
+}
+impl<'a> NetlinkTable {
+    fn new() -> NetlinkTable {
+        NetlinkTable {
+            hash: HashMap::new(),
+            listeners: Some(Listeners { masks: Vec::new() }),
+            registered: 0,
+            flags: 0,
+            groups: 0,
+            mc_list: HListHead { first: None },
+            bind: None,
+            unbind: None,
+            compare: None,
+        }
+    }
+    fn listeners(&self) -> RCuListeners {
+        RCuListeners::new()
+    }
+    fn flags(&self) -> u32 {
+        0
+    }
+    fn groups(&self) -> u32 {
+        0
+    }
+    pub fn set_registered(&mut self, registered: u32) {
+        self.registered = registered;
+    }
+    pub fn set_flags(&mut self, flags: u32) {
+        self.flags = flags;
+    }
+    pub fn set_groups(&mut self, groups: u32) {
+        self.groups = groups;
+    }
+    pub fn get_registered(&self) -> u32 {
+        self.registered
+    }
+    fn set_callbacks(&mut self, cfg: NetlinkKernelCfg) {
+        self.bind = cfg.bind;
+        self.unbind = cfg.unbind;
+        self.compare = cfg.compare;
+    }
+}
+
+pub struct LockedNetlinkTable(RwLock<NetlinkTable>);
+
+impl LockedNetlinkTable {
+    pub fn new(netlinktable: NetlinkTable) -> LockedNetlinkTable {
+        LockedNetlinkTable(RwLock::new(netlinktable))
+    }
+}
+// You would need to implement the actual methods for the traits and the bind/unbind functions.
+trait NetlinkMessageHandler {
+    fn handle_message(&mut self, msg: &[u8]) {
+        // Implementation of message handling
+    }
+}
+
+struct RCuListeners {
+    list: Vec<Box<dyn NetlinkMessageHandler>>,
+}
+
+impl RCuListeners {
+    fn new() -> Self {
+        Self { list: Vec::new() }
+    }
+
+    fn register(&mut self, listener: Box<dyn NetlinkMessageHandler>) {
+        self.list.push(listener);
+    }
+
+    fn handle_message(&mut self, msg: &[u8]) {
+        for listener in &mut self.list {
+            listener.handle_message(msg);
+        }
+    }
+}
+
+// https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/af_netlink.c#2916
+/// netlink 协议的最大数量
+const MAX_LINKS: usize = 32;
+#[unified_init(INITCALL_CORE)]
+/// netlink 协议的初始化函数
+fn netlink_proto_init() -> Result<(), SystemError> {
+    unsafe {
+        let err = proto_register(&mut NETLINK_PROTO, 0);
+        if err.is_err() {
+            return Err(SystemError::ENOSYS);
+        }
+    }
+    // 创建NetlinkTable,每种netlink协议类型占数组中的一项,后续内核中创建的不同种协议类型的netlink都将保存在这个表中,由该表统一维护
+    // 检查NetlinkTable的大小是否符合预期
+    let mut nl_table = NL_TABLE.write();
+    // let mut nl_table = [0; MAX_LINKS];
+    if nl_table.is_empty() {
+        panic!("netlink_init: Cannot allocate nl_table");
+    }
+    // 初始化哈希表
+    for i in 0..MAX_LINKS {
+        nl_table[i].hash = HashMap::new();
+    }
+    // 将读写锁守卫作为参数传递,避免锁的重复获取造成阻塞
+    netlink_add_usersock_entry(&mut nl_table);
+    // TODO: 以下函数需要 net namespace 支持
+    sock_register(&NETLINK_FAMILY_OPS);
+    // register_pernet_subsys(&netlink_net_ops);
+    // register_pernet_subsys(&netlink_tap_net_ops);
+    /* The netlink device handler may be needed early. */
+    // rtnetlink_init();
+    Ok(())
+}
+
+pub struct NetlinkFamulyOps {
+    family: AddressFamily,
+    // owner: Module,
+}
+
+// impl NetProtoFamily for NetlinkFamulyOps {
+//     // https://code.dragonos.org.cn/s?refs=netlink_create&project=linux-6.1.9
+//     /// netlink_create() 用户空间创建一个netlink套接字
+//     fn create(socket: &mut dyn Socket, protocol: i32, _kern: bool) -> Result<(), Error> {
+//         // 假设我们有一个类型来跟踪协议最大值
+//         const MAX_LINKS: i32 = 1024;
+//         // if socket.type_ != SocketType::Raw && socket.type_ != SocketType::Dgram {
+//         //     return Err(Error::SocketTypeNotSupported);
+//         // }
+//         if !(0..MAX_LINKS).contains(&protocol) {
+//             // todo: 这里不符合规范,后续待修改为 SystemError
+//             return Err(Error::ProtocolNotSupported);
+//         }
+//         // 安全的数组索引封装
+//         let protocol = protocol as usize;
+//         Ok(())
+//     }
+// }
+
+lazy_static! {
+    static ref NETLINK_FAMILY_OPS: NetlinkFamulyOps = NetlinkFamulyOps {
+        family: AddressFamily::Netlink,
+    };
+}
+
+pub fn sock_register(ops: &NetlinkFamulyOps) {}
+/// 初始化和注册一个用户套接字条目,并将其添加到全局的NetlinkTable向量中
+pub fn netlink_add_usersock_entry(nl_table: &mut RwLockWriteGuard<Vec<NetlinkTable>>) {
+    let listeners: Option<Listeners> = Some(Listeners::new());
+    let groups: u32 = 32;
+    if listeners.is_none() {
+        panic!("netlink_add_usersock_entry: Cannot allocate listeners\n");
+    }
+
+    let index = NETLINK_USERSOCK;
+    nl_table[index].groups = groups;
+    // rcu_assign_pointer(nl_table[index].listeners, listeners);
+    // nl_table[index].module = THIS_MODULE;
+    nl_table[index].registered = 1;
+    nl_table[index].flags = NL_CFG_F_NONROOT_SEND;
+}
+// https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/af_netlink.c#572
+/// 内核套接字插入 nl_table
+pub fn netlink_insert(
+    sk: Arc<Mutex<Box<dyn NetlinkSocket>>>,
+    portid: u32,
+) -> Result<(), SystemError> {
+    let mut nl_table = NL_TABLE.write();
+
+    let index = sk.lock().sk_protocol();
+
+    let nlk = Arc::new(RwLock::new(
+        sk.lock()
+            .deref()
+            .as_any()
+            .downcast_ref::<NetlinkSock>()
+            .ok_or(SystemError::EINVAL)?
+            .clone(),
+    ));
+    {
+        let nlk_guard = nlk.read();
+        // 检查端口是否匹配
+        if nlk_guard.portid != portid {
+            log::debug!("netlink_insert: portid mismatch\n");
+            return Err(SystemError::EOVERFLOW);
+        }
+    }
+
+    {
+        let mut nlk_guard = nlk.write();
+        // 绑定端口
+        nlk_guard.portid = portid;
+        // 设置套接字已绑定
+        nlk_guard.bound = portid != 0;
+        // 将套接字插入哈希表
+        nl_table[index].hash.insert(portid, Arc::clone(&sk));
+        log::debug!("netlink_insert: inserted socket\n");
+    }
+
+    Ok(())
+}
+///
+fn netlink_bind(
+    sock: Arc<Mutex<Box<dyn NetlinkSocket>>>,
+    addr: &SockAddrNl,
+) -> Result<(), SystemError> {
+    log::info!("netlink_bind here!");
+    let sk = Arc::clone(&sock);
+    // todo: net namespace支持
+    // let net = sock_net(sk);
+    let nlk: Arc<NetlinkSock> = Arc::clone(&sk)
+        .arc_any()
+        .downcast()
+        .map_err(|_| SystemError::EINVAL)?;
+    let nladdr = addr;
+    let mut err = 0;
+    let mut groups: u32;
+    let mut bound: bool;
+
+    if nladdr.nl_family != AddressFamily::Netlink {
+        return Err(SystemError::EINVAL);
+    }
+    groups = nladdr.nl_groups;
+
+    // Only superuser is allowed to listen multicasts
+    // if groups != 0 {
+    //     if !netlink_allowed(sock, NL_CFG_F_NONROOT_RECV) {
+    //         return Err(-EPERM);
+    //     }
+    //     err = netlink_realloc_groups(sk);
+    //     if err != 0 {
+    //         return Err(err);
+    //     }
+    // }
+
+    // BITS_PER_LONG = __WORDSIZE = 64
+    if nlk.ngroups < 64 as u64 {
+        groups &= (1 << nlk.ngroups) - 1;
+    }
+
+    bound = nlk.bound;
+    if bound {
+        // Ensure nlk.portid is up-to-date.
+        if nladdr.nl_pid != nlk.portid {
+            return Err(SystemError::EINVAL);
+        }
+    }
+
+    if groups != 0 {
+        for group in 0..(mem::size_of::<u32>() * 8) as u32 {
+            if group == groups {
+                continue;
+            }
+            // err = nlk.bind().unwrap()(group + 1);
+            if err == 0 {
+                continue;
+            }
+            // netlink_undo_bind(group, groups, sk);
+            return Err(SystemError::EINVAL);
+        }
+    }
+
+    // No need for barriers here as we return to user-space without
+    // using any of the bound attributes.
+    if !bound {
+        if nladdr.nl_pid != 0 {
+            let _ = netlink_insert(sk, nladdr.nl_pid);
+        } else {
+            // todo
+            // netlink_autobind(sock)
+        };
+        if err != 0 {
+            // BITS_PER_TYPE<TYPE> = SIZEOF TYPE * BITS PER BYTES
+            // todo
+            // netlink_undo_bind(mem::size_of::<u32>() * 8, groups, sk);
+            // netlink_unlock_table();
+            return Err(SystemError::EINVAL);
+        }
+    }
+
+    // todo
+    // netlink_update_subscriptions(sk, nlk.subscriptions + hweight32(groups) - hweight32(nlk.groups.unwrap()[0]));
+    // nlk.groups.unwrap()[0] = (nlk.groups.unwrap()[0] & !0xffffffff) | groups;
+    // netlink_update_listeners(sk);
+
+    Ok(())
+}
+
+// TODO: net namespace支持
+// https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/af_netlink.c#532
+/// 在 netlink_table 中查找 netlink 套接字
+fn netlink_lookup(protocol: usize, portid: u32) -> Arc<Mutex<Box<dyn NetlinkSocket>>> {
+    // todo: net 支持
+    let nl_table = NL_TABLE.read();
+    let index = protocol;
+    let sk = nl_table[index].hash.get(&portid).unwrap();
+    Arc::clone(sk)
+}
+
+// https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/af_netlink.c#672
+
+pub enum Error {
+    SocketTypeNotSupported,
+    ProtocolNotSupported,
+}
+
+// netlink机制特定的内核抽象,不同于标准的trait Socket
+pub trait NetlinkSocket: Socket + Any {
+    // fn sk_prot(&self) -> &dyn proto;
+    fn sk_family(&self) -> i32;
+    fn sk_state(&self) -> NetlinkState;
+    fn sk_protocol(&self) -> usize;
+    fn sk_rmem_alloc(&self) -> usize;
+    fn sk_rcvbuf(&self) -> usize;
+    fn enqueue_skb(&mut self, skb: Arc<RwLock<SkBuff>>);
+    fn is_kernel(&self) -> bool;
+    fn equals(&self, other: Arc<Mutex<Box<dyn NetlinkSocket>>>) -> bool;
+    fn portid(&self) -> u32;
+    fn ngroups(&self) -> u64;
+    fn groups(&self) -> Vec<u64>;
+    fn flags(&self) -> Option<SockFlags>;
+    fn sock_sndtimeo(&self, noblock: bool) -> i64;
+    fn as_any(&self) -> &dyn Any;
+}
+
+pub trait NetlinkSocketWithCallback {
+    fn sk_data_ready(&self, callback: impl Fn(i32) -> i32);
+}
+/* linux:struct sock has to be the first member of netlink_sock */
+// linux 6.1.9中的netlink_sock结构体里,sock是一个很大的结构体,这里简化
+// 意义是:netlink_sock(NetlinkSock)是一个sock(NetlinkSocket), 实现了 Netlinksocket trait 和 Sock trait.
+
+#[derive(Debug, Clone)]
+struct NetlinkSockMetadata {}
+impl NetlinkSockMetadata {
+    fn new() -> NetlinkSockMetadata {
+        NetlinkSockMetadata {}
+    }
+}
+#[derive(Debug, Clone)]
+#[cast_to([sync] Socket)]
+#[cast_to([sync] NetlinkSocket)]
+pub struct NetlinkSock {
+    // sk: Option<Weak<dyn NetlinkSocket>>,
+    portid: u32,
+    node: Arc<HListHead>,
+    dst_portid: u32,
+    dst_group: u32,
+    pub flags: u32,
+    subscriptions: u32,
+    ngroups: u64,
+    groups: Vec<u64>,
+    pub protocol: usize,
+    bound: bool,
+    state: NetlinkState,
+    max_recvmsg_len: usize,
+    dump_done_errno: i32,
+    cb_running: bool,
+    queue: Vec<Arc<RwLock<SkBuff>>>,
+    data: Arc<Mutex<Vec<Vec<u8>>>>,
+    sk_sndtimeo: i64,
+    sk_rcvtimeo: i64,
+    callback: Option<&'static dyn NetlinkCallback>,
+}
+impl Socket for NetlinkSock {
+    fn connect(&self, _endpoint: Endpoint) -> Result<(), SystemError> {
+        self.netlink_connect(_endpoint)
+    }
+    fn shutdown(&self, _type: ShutdownTemp) -> Result<(), SystemError> {
+        todo!()
+    }
+    fn bind(&self, _endpoint: Endpoint) -> Result<(), SystemError> {
+        log::debug!("NetlinkSock bind to {:?}", _endpoint);
+        match _endpoint {
+            Endpoint::Netlink(netlinkendpoint) => {
+                let addr = netlinkendpoint.addr;
+                let sock: Arc<Mutex<Box<dyn NetlinkSocket>>> =
+                    Arc::new(Mutex::new(Box::new(self.clone())));
+                let _ = netlink_bind(sock, &addr);
+            }
+            _ => {
+                return Err(SystemError::EINVAL);
+            }
+        }
+        Ok(())
+    }
+    fn close(&self) -> Result<(), SystemError> {
+        Ok(())
+    }
+    fn listen(&self, _backlog: usize) -> Result<(), SystemError> {
+        todo!()
+    }
+    fn accept(&self) -> Result<(Arc<Inode>, Endpoint), SystemError> {
+        todo!()
+    }
+
+    fn wait_queue(&self) -> &WaitQueue {
+        todo!()
+    }
+
+    fn poll(&self) -> usize {
+        todo!()
+    }
+    // 借用 send_to 的接口模拟netlink_sendmsg的功能
+    fn send_to(
+        &self,
+        buffer: &[u8],
+        flags: MessageFlag,
+        address: Endpoint,
+    ) -> Result<usize, SystemError> {
+        log::debug!("NetlinkSock send_to");
+        return self.netlink_send(buffer, address);
+    }
+    // 借用 recv_from 的接口模拟netlink_recvmsg的功能
+    fn recv_from(
+        &self,
+        msg: &mut [u8],
+        flags: MessageFlag,
+        address: Option<Endpoint>,
+    ) -> Result<(usize, Endpoint), SystemError> {
+        log::debug!("NetlinkSock recv_from");
+        return self.netlink_recv(msg, flags);
+    }
+    fn send_buffer_size(&self) -> usize {
+        log::warn!("send_buffer_size is implemented to 0");
+        0
+    }
+    fn recv_buffer_size(&self) -> usize {
+        log::warn!("recv_buffer_size is implemented to 0");
+        0
+    }
+}
+impl IndexNode for NetlinkSock {
+    fn read_at(
+        &self,
+        offset: usize,
+        len: usize,
+        buf: &mut [u8],
+        _data: SpinLockGuard<FilePrivateData>,
+    ) -> Result<usize, SystemError> {
+        // Implementation of the function
+        Ok(0)
+    }
+    fn write_at(
+        &self,
+        offset: usize,
+        len: usize,
+        buf: &[u8],
+        _data: SpinLockGuard<FilePrivateData>,
+    ) -> Result<usize, SystemError> {
+        // Implementation of the function
+        Ok(0)
+    }
+    fn fs(&self) -> Arc<dyn FileSystem> {
+        todo!()
+    }
+    fn as_any_ref(&self) -> &dyn Any {
+        self
+    }
+    fn list(&self) -> Result<Vec<String>, SystemError> {
+        // Implementation of the function
+        Ok(Vec::new())
+    }
+}
+// TODO: 实现 NetlinkSocket trait
+impl NetlinkSocket for NetlinkSock {
+    fn sk_family(&self) -> i32 {
+        0
+    }
+    fn sk_state(&self) -> NetlinkState {
+        return self.state;
+    }
+    fn sk_protocol(&self) -> usize {
+        return self.protocol;
+    }
+    fn sk_rmem_alloc(&self) -> usize {
+        0
+    }
+    fn sk_rcvbuf(&self) -> usize {
+        0
+    }
+    fn enqueue_skb(&mut self, skb: Arc<RwLock<SkBuff>>) {
+        self.queue.push(skb);
+    }
+    fn is_kernel(&self) -> bool {
+        self.flags & NetlinkFlags::NETLINK_F_KERNEL_SOCKET.bits() != 0
+    }
+    fn equals(&self, other: Arc<Mutex<Box<dyn NetlinkSocket>>>) -> bool {
+        let binding = other.lock();
+        let nlk = binding
+            .deref()
+            .as_any()
+            .downcast_ref::<NetlinkSock>()
+            .ok_or(SystemError::EINVAL)
+            .clone()
+            .unwrap();
+        return self.portid == nlk.portid;
+    }
+    fn portid(&self) -> u32 {
+        0
+    }
+    fn ngroups(&self) -> u64 {
+        0
+    }
+    fn groups(&self) -> Vec<u64> {
+        Vec::new()
+    }
+    fn flags(&self) -> Option<SockFlags> {
+        Some(SockFlags::SockDead)
+    }
+    fn sock_sndtimeo(&self, noblock: bool) -> i64 {
+        if noblock == true {
+            return 0;
+        } else {
+            return self.sk_sndtimeo;
+        }
+    }
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+}
+impl NetlinkSocketWithCallback for NetlinkSock {
+    fn sk_data_ready(&self, callback: impl Fn(i32) -> i32) { /* 实现 */
+    }
+}
+impl NetlinkSock {
+    /// 元数据的缓冲区的大小
+    pub const DEFAULT_METADATA_BUF_SIZE: usize = 1024;
+    /// 默认的接收缓冲区的大小 receive
+    pub const DEFAULT_RX_BUF_SIZE: usize = 512 * 1024;
+    /// 默认的发送缓冲区的大小 transmiss
+    pub const DEFAULT_TX_BUF_SIZE: usize = 512 * 1024;
+    pub fn new() -> NetlinkSock {
+        let vec_of_vec_u8: Vec<Vec<u8>> = Vec::new();
+        let mutex_protected = Mutex::new(vec_of_vec_u8);
+        let data: Arc<Mutex<Vec<Vec<u8>>>> = Arc::new(mutex_protected);
+        NetlinkSock {
+            // sk: None,
+            portid: 0,
+            node: Arc::new(HListHead { first: None }),
+            dst_portid: 0,
+            dst_group: 0,
+            flags: 0,
+            subscriptions: 0,
+            ngroups: 0,
+            groups: Vec::new(),
+            bound: false,
+            state: NetlinkState::NetlinkUnconnected,
+            protocol: 1,
+            max_recvmsg_len: 0,
+            dump_done_errno: 0,
+            cb_running: false,
+            queue: Vec::new(),
+            data,
+            sk_sndtimeo: 0,
+            sk_rcvtimeo: 0,
+            callback: None,
+        }
+    }
+    fn register(&self, listener: Box<dyn NetlinkMessageHandler>) {
+        // Implementation of the function
+    }
+    fn unregister(&self, listener: Box<dyn NetlinkMessageHandler>) {
+        // Implementation of the function
+    }
+    // https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/af_netlink.c#1078
+    ///
+    fn netlink_connect(&self, _endpoint: Endpoint) -> Result<(), SystemError> {
+        Ok(())
+    }
+
+    // https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/af_netlink.c#1849
+    /// 用户进程对netlink套接字调用 sendmsg() 系统调用后,内核执行netlink操作的总入口函数
+    /// ## 参数
+    /// - sock    - 指向用户进程的netlink套接字,也就是发送方的
+    /// - msg     - 承载了发送方传递的netlink消息
+    /// - len     - netlink消息长度
+    /// ## 备注
+    /// netlink套接字在创建的过程中(具体是在 netlink_create 开头),已经和 netlink_ops (socket层netlink协议族的通用操作集合)关联,其中注册的 sendmsg 回调就是指向本函数
+    fn netlink_send(&self, data: &[u8], address: Endpoint) -> Result<usize, SystemError> {
+        log::info!("netlink_send: data: {:?}", data);
+        // 一个有效的 Netlink 消息至少应该包含一个消息头
+        if data.len() < size_of::<NLmsghdr>() {
+            log::warn!("netlink_send: data too short, len: {}", data.len());
+            return Err(SystemError::EINVAL);
+        }
+        #[allow(unsafe_code)]
+        let header = unsafe { &*(data.as_ptr() as *const NLmsghdr) };
+        if header.nlmsg_len as usize > data.len() {
+            log::warn!(
+                "netlink_send: data too short, nlmsg_len: {}",
+                header.nlmsg_len
+            );
+            return Err(SystemError::ENAVAIL);
+        }
+        // let message_type = NLmsgType::from(header.nlmsg_type);
+        let mut buffer = self.data.lock();
+        buffer.clear();
+
+        let mut msg = Vec::new();
+        let new_header = NLmsghdr {
+            nlmsg_len: 0, // to be determined later
+            nlmsg_type: NLmsgType::NLMSG_DONE.into(),
+            nlmsg_flags: NLmsgFlags::NLM_F_MULTI,
+            nlmsg_seq: header.nlmsg_seq,
+            nlmsg_pid: header.nlmsg_pid,
+        };
+        // 将新消息头序列化到 msg 中
+        msg.push_ext(new_header);
+        // 将消息体数据追加到 msg 中
+        msg.extend_from_slice(data);
+        // 确保 msg 的长度按照 4 字节对齐
+        msg.align4();
+        // msg 的开头设置消息长度。
+        msg.set_ext(0, msg.len() as u32);
+        // 将序列化后的 msg 添加到发送缓冲区 buffer 中
+        buffer.push(msg);
+        Ok(data.len())
+    }
+
+    // https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/af_netlink.c#1938
+    /// 用户进程对 netlink 套接字调用 recvmsg() 系统调用后,内核执行 netlink 操作的总入口函数
+    /// ## 参数
+    /// - sock    - 指向用户进程的netlink套接字,也就是接收方的
+    /// - msg     - 用于存放接收到的netlink消息
+    /// - len     - 用户空间支持的netlink消息接收长度上限
+    /// - flags   - 跟本次接收操作有关的标志位集合(主要来源于用户空间)
+    fn netlink_recv(
+        &self,
+        msg: &mut [u8],
+        flags: MessageFlag,
+    ) -> Result<(usize, Endpoint), SystemError> {
+        let mut copied: usize = 0;
+        let mut buffer = self.data.lock();
+        let msg_kernel = buffer.remove(0);
+
+        // 判断是否是带外消息,如果是带外消息,直接返回错误码
+        if flags == MessageFlag::OOB {
+            log::warn!("netlink_recv: OOB message is not supported");
+            return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP);
+        }
+
+        // 计算实际要复制的数据长度,不能超过 msg_from 的长度 或 msg 缓冲区的长度
+        let actual_len = msg_kernel.len().min(msg.len());
+
+        if !msg_kernel.is_empty() {
+            msg[..actual_len].copy_from_slice(&msg_kernel[..actual_len]);
+            copied = actual_len;
+        } else {
+            // 如果没有数据可复制,返回 0 字节被复制
+            copied = 0;
+        }
+
+        let endpoint = Endpoint::Netlink(NetlinkEndpoint {
+            addr: SockAddrNl {
+                nl_family: AddressFamily::Netlink,
+                nl_pad: 0,
+                nl_pid: self.portid,
+                nl_groups: 0,
+            },
+        });
+
+        // 返回复制的字节数和端点信息
+        log::debug!("netlink_recv: copied: {}, endpoint: {:?}", copied, endpoint);
+        Ok((copied, endpoint))
+    }
+}
+
+#[derive(Clone)]
+pub struct Listeners {
+    // Recursive Wakeup Unlocking?
+    masks: Vec<u64>,
+}
+impl Listeners {
+    pub fn new() -> Listeners {
+        Listeners { masks: Vec::new() }
+    }
+    fn masks(&self) -> Vec<u64> {
+        Vec::new()
+    }
+}
+
+fn initialize_netlink_table() -> RwLock<Vec<NetlinkTable>> {
+    let mut tables = Vec::with_capacity(MAX_LINKS);
+    for _ in 0..MAX_LINKS {
+        tables.push(NetlinkTable::new());
+    }
+    RwLock::new(tables)
+}
+
+lazy_static! {
+    /// 一个维护全局的 NetlinkTable 向量,每一个元素代表一个 netlink 协议类型,最大数量为 MAX_LINKS
+    pub static ref NL_TABLE: RwLock<Vec<NetlinkTable>> = initialize_netlink_table();
+}
+
+pub fn netlink_has_listeners(sk: &NetlinkSock, group: u32) -> i32 {
+    log::info!("netlink_has_listeners");
+    let mut res = 0;
+    let protocol = sk.sk_protocol();
+
+    // 获取读锁
+    let nl_table = NL_TABLE.read();
+
+    // 检查 protocol 是否在范围内
+    if protocol >= nl_table.len() {
+        log::error!(
+            "Protocol {} is out of bounds, table's len is {}",
+            protocol,
+            nl_table.len()
+        );
+        return res;
+    }
+
+    // 获取对应的 NetlinkTable
+    let netlink_table = &nl_table[protocol];
+
+    // 检查 listeners 是否存在
+    if let Some(listeners) = &netlink_table.listeners {
+        // 检查 group 是否在范围内
+        if group > 0 && (group as usize - 1) < listeners.masks.len() {
+            res = listeners.masks[group as usize - 1] as i32;
+        } else {
+            log::error!("Group {} is out of bounds", group);
+        }
+    } else {
+        log::error!("Listeners for protocol {} are None", protocol);
+    }
+
+    res
+}
+struct NetlinkBroadcastData<'a> {
+    exclude_sk: &'a Arc<dyn NetlinkSocket>,
+    // net: &'a Net,
+    portid: u32,
+    group: u64,
+    failure: i32,
+    delivery_failure: i32,
+    congested: i32,
+    delivered: i32,
+    allocation: u32,
+    skb: Arc<RwLock<SkBuff>>,
+    skb_2: Arc<RwLock<SkBuff>>,
+}
+impl<'a> NetlinkBroadcastData<'a> {
+    pub fn copy_skb_to_skb_2(&mut self) {
+        let skb = self.skb.read().clone();
+        *self.skb_2.write() = skb;
+    }
+}
+/// 尝试向指定用户进程 netlink 套接字发送组播消息
+/// ## 参数:
+/// - sk: 指向一个 sock 结构,对应一个用户进程 netlink 套接字
+/// - info: 指向一个 netlink 组播消息的管理块
+/// ## 备注:
+/// 传入的 netlink 套接字跟组播消息属于同一种 netlink 协议类型,并且这个套接字开启了组播阅订,除了这些,其他信息(比如阅订了具体哪些组播)都是不确定的
+fn do_one_broadcast(
+    sk: Arc<Mutex<Box<dyn NetlinkSocket>>>,
+    info: &mut Box<NetlinkBroadcastData>,
+) -> Result<(), SystemError> {
+    log::info!("do_one_broadcast");
+    // 从Arc<dyn NetlinkSocket>中获取NetlinkSock
+    let nlk: Arc<NetlinkSock> = Arc::clone(&sk)
+        .arc_any()
+        .downcast()
+        .map_err(|_| SystemError::EINVAL)?;
+    // 如果源 sock 和目的 sock 是同一个则直接返回
+    if info.exclude_sk.equals(sk.clone()) {
+        return Err(SystemError::EINVAL);
+    }
+    // 如果目的单播地址就是该 netlink 套接字
+    // 或者目的组播地址超出了该 netlink 套接字的上限
+    // 或者该 netlink 套接字没有阅订这条组播消息,都直接返回
+    if nlk.portid() == info.portid
+        || info.group > nlk.ngroups()
+        || !nlk.groups().contains(&(info.group - 1))
+    {
+        return Err(SystemError::EINVAL);
+    }
+    // TODO: 需要net namespace支持
+    // if !net_eq(sock_net(sk), info.net) {
+    //     if !(nlk.flags & NetlinkFlags::LISTEN_ALL_NSID.bits()) {
+    //         return;
+    //     }
+    //     if !peernet_has_id(sock_net(sk), info.net) {
+    //         return;
+    //     }
+    //     if !file_ns_capable(sk.sk_socket.file, info.net.user_ns, CAP_NET_BROADCAST) {
+    //         return;
+    //     }
+    // }
+
+    // 如果 netlink 组播消息的管理块携带了 failure 标志, 则对该 netlink 套接字设置缓冲区溢出状态
+    if info.failure != 0 {
+        netlink_overrun(&sk);
+        return Err(SystemError::EINVAL);
+    }
+    // 设置 skb2,其内容来自 skb
+    if info.skb_2.read().is_empty() {
+        if skb_shared(&info.skb) {
+            info.copy_skb_to_skb_2();
+        } else {
+            info.skb_2 = Arc::new(RwLock::new(info.skb.read().clone()));
+            skb_orphan(&info.skb_2);
+        }
+    }
+    // 到这里如果 skb2 还是 NULL,意味着上一步中 clone 失败
+    if info.skb_2.read().is_empty() {
+        netlink_overrun(&sk);
+        info.failure = 1;
+        if !sk.lock().flags().is_none() & !NetlinkFlags::BROADCAST_SEND_ERROR.bits().is_zero() {
+            info.delivery_failure = 1;
+        }
+        return Err(SystemError::EINVAL);
+    }
+    if sk_filter(&sk, &info.skb_2) {
+        return Err(SystemError::EINVAL);
+    }
+    // TODO: 需要net namespace支持
+    // peernet2id用于检索与给定网络(net)相关联的对等网络(peer)的ID
+    // NETLINK_CB(info.skb_2).nsid = peernet2id(sock_net(sk), info.net);
+    // if NETLINK_CB(info.skb_2).nsid != NETNSA_NSID_NOT_ASSIGNED {
+    //     NETLINK_CB(info.skb_2).nsid_is_set = true;
+    // }
+    let ret = netlink_broadcast_deliver(Arc::clone(&sk), &info.skb_2);
+    // 如果将承载了组播消息的 skb 发送到该用户进程 netlink 套接字失败
+    if ret < 0 {
+        netlink_overrun(&sk);
+        if !sk.lock().flags().is_none() & !NetlinkFlags::BROADCAST_SEND_ERROR.bits().is_zero() {
+            info.delivery_failure = 1;
+        }
+    } else {
+        info.congested |= ret;
+        info.delivered = 1;
+        info.skb_2 = Arc::new(RwLock::new(info.skb.read().clone()));
+    }
+    drop(sk);
+    log::info!("do_one_broadcast success");
+    Ok(())
+}
+/// 发送 netlink 组播消息
+/// ## 参数
+/// - ssk: 源 sock
+/// - skb: 属于发送方的承载了netlink消息的skb
+/// - portid: 目的单播地址
+/// - group: 目的组播地址
+///
+/// ## 备注: 以下2种情况都会调用到本函数:
+///  [1]. 用户进程   --组播--> 用户进程
+///  [2]. kernel     --组播--> 用户进程
+///
+pub fn netlink_broadcast<'a>(
+    ssk: &'a Arc<dyn NetlinkSocket>,
+    skb: Arc<RwLock<SkBuff>>,
+    portid: u32,
+    group: u64,
+    allocation: u32,
+) -> Result<(), SystemError> {
+    log::info!("netlink_broadcast");
+    // TODO: 需要net namespace支持
+    // let net = sock_net(ssk);
+    let mut info = Box::new(NetlinkBroadcastData {
+        exclude_sk: ssk,
+        // net,
+        portid,
+        group,
+        failure: 0,
+        delivery_failure: 0,
+        congested: 0,
+        delivered: 0,
+        allocation,
+        skb,
+        skb_2: Arc::new(RwLock::new(SkBuff::new())),
+    });
+
+    // While we sleep in clone, do not allow to change socket list
+    let nl_table = NL_TABLE.read();
+    // 遍历该 netlink 套接字所在协议类型中所有阅订了组播功能的套接字,然后尝试向其发送该组播消息
+    for sk in &mut nl_table[ssk.sk_protocol()].mc_list.iter() {
+        let _ = do_one_broadcast(Arc::clone(sk), &mut info);
+    }
+
+    drop(info.skb);
+
+    if info.delivery_failure != 0 {
+        return Err(SystemError::ENOBUFS);
+    }
+    drop(info.skb_2);
+
+    if info.delivered != 0 {
+        if info.congested != 0 {
+            Syscall::do_sched_yield()?;
+        }
+        return Ok(());
+    }
+    return Err(SystemError::ESRCH);
+}
+
+/// 对网络套接字(sk)和网络数据包(skb)进行过滤
+fn sk_filter(sk: &Arc<Mutex<Box<dyn NetlinkSocket>>>, skb: &Arc<RwLock<SkBuff>>) -> bool {
+    // TODO: Implementation of the function
+    false
+}
+
+// https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/af_netlink.c?fi=netlink_has_listeners#1400
+/// 处理Netlink套接字的广播消息传递
+/// - 将携带了 netlink 组播消息的 skb 发送到指定目的用户进程 netlink 套接字
+///
+/// ## 参数
+/// - sk: 指向一个 sock 结构,对应一个用户进程 netlink 套接字
+/// - skb: 指向一个网络缓冲区 skb,携带了 netlink 组播消息
+///
+/// ## 返回值      
+///  - -1: 套接字接收条件不满足
+///  - 0: netlink组播消息发送成功,套接字已经接收但尚未处理数据长度小于等于其接收缓冲的1/2
+///  - 1: netlink组播消息发送成功,套接字已经接收但尚未处理数据长度大于其接收缓冲的1/2(这种情况似乎意味着套接字处于拥挤状态)
+///
+/// ## 备注:
+/// - 到这里,已经确定了传入的 netlink 套接字跟组播消息匹配正确;
+/// - netlink 组播消息不支持阻塞
+fn netlink_broadcast_deliver(
+    sk: Arc<Mutex<Box<dyn NetlinkSocket>>>,
+    skb: &Arc<RwLock<SkBuff>>,
+) -> i32 {
+    log::info!("netlink_broadcast_deliver");
+    let nlk: Arc<RwLock<NetlinkSock>> = Arc::clone(&sk)
+        .arc_any()
+        .downcast()
+        .expect("Invalid downcast to LockedNetlinkSock");
+    let nlk_guard = nlk.read();
+    // 如果接收缓冲区的已分配内存小于或等于其总大小,并且套接字没有被标记为拥塞,则继续执行内部的代码块。
+    if (sk.lock().sk_rmem_alloc() <= sk.lock().sk_rcvbuf())
+        && !(nlk_guard.state == NetlinkState::NETLINK_S_CONGESTED)
+    {
+        // 如果满足接收条件,则设置skb的所有者是该netlink套接字
+        netlink_skb_set_owner_r(skb, sk.clone());
+        // 将 skb 发送到该 netlink 套接字,实际也就是将该 skb 放入了该套接字的接收队列中
+        let _ = netlink_sendskb(sk.clone(), skb);
+        // 如果套接字的接收缓冲区已经接收但尚未处理数据长度大于其接收缓冲的1/2,则返回1
+        if &sk.lock().sk_rmem_alloc() > &(sk.lock().sk_rcvbuf() >> 1) {
+            return 1;
+        } else {
+            return 0;
+        }
+    }
+    -1
+}
+// https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/af_netlink.c?fi=netlink_has_listeners#387
+/// 设置一个网络缓冲区skb的所有者为指定的源套接字sk
+fn netlink_skb_set_owner_r(skb: &Arc<RwLock<SkBuff>>, sk: Arc<Mutex<Box<dyn NetlinkSocket>>>) {
+    // WARN_ON(skb->sk != NULL);
+    let mut skb_write = skb.write();
+    skb_write.sk = sk;
+    // skb->destructor = netlink_skb_destructor;
+    // atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+    // sk_mem_charge(sk, skb->truesize);
+}
+pub struct NetlinkSocketWrapper {
+    sk: Arc<dyn NetlinkSocket>,
+}
+impl NetlinkSocketWrapper {
+    pub fn new(sk: Arc<dyn NetlinkSocket>) -> NetlinkSocketWrapper {
+        NetlinkSocketWrapper { sk }
+    }
+}
+// https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/af_netlink.c?fi=netlink_has_listeners#1268
+/// 将一个网络缓冲区 skb 中的数据发送到指定的 目标进程套接字 sk
+fn netlink_sendskb(sk: Arc<Mutex<Box<dyn NetlinkSocket>>>, skb: &Arc<RwLock<SkBuff>>) -> u32 {
+    let len = skb.read().len;
+    {
+        // 将 skb 放入该 netlink 套接字接收队列末尾
+        sk.lock().enqueue_skb(skb.clone());
+        // 执行 sk_data_ready 回调通知该套接字有数据可读
+        let nlk: Arc<NetlinkSock> = Arc::clone(&sk)
+            .arc_any()
+            .downcast()
+            .expect("Invalid downcast to NetlinkSock");
+        sk_data_ready(nlk);
+    }
+    len
+}
+// https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/af_netlink.c#1337
+/// 内核执行 netlink 单播消息
+/// ## 参数
+/// - ssk:源sock结构
+/// - skb: 属于发送方的承载了 netlink 消息的 skb
+/// - portid: 目的单播地址
+/// - nonblock    - 1:非阻塞调用,2:阻塞调用
+fn netlink_unicast(
+    ssk: Arc<Mutex<Box<dyn NetlinkSocket>>>,
+    skb: Arc<RwLock<SkBuff>>,
+    portid: u32,
+    nonblock: bool,
+) -> Result<u32, SystemError> {
+    let mut err: i32;
+    let timeo: i64;
+    // todo:重新调整skb的大小
+    // skb = netlink_trim(skb, gfp_any());
+    // 计算发送超时时间(如果是非阻塞调用,则返回 0)
+    timeo = ssk.lock().sock_sndtimeo(nonblock);
+    loop {
+        // 根据源sock结构和目的单播地址,得到目的sock结构
+        let sk = netlink_getsockbyportid(ssk.clone(), portid);
+        if sk.is_err() {
+            drop(skb);
+            return Err(sk.err().unwrap());
+        }
+        let sk = sk.unwrap();
+
+        if sk.lock().is_kernel() {
+            return Ok(netlink_unicast_kernel(sk, ssk, skb));
+        }
+
+        if sk_filter(&sk, &skb) {
+            let err = skb.read().len;
+            drop(skb);
+            return Err(SystemError::EINVAL);
+        }
+
+        err = netlink_attachskb(sk.clone(), skb.clone(), timeo, ssk.clone()).unwrap() as i32;
+        if err == 1 {
+            continue; // 重试
+        }
+        if err != 0 {
+            return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
+        }
+        return Ok(netlink_sendskb(sk, &skb));
+    }
+}
+
+// https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/af_netlink.c#1316
+/// 来自用户进程的 netlink 消息 单播 发往内核 netlink 套接字
+/// ## 参数
+/// - sk:目的sock结构
+/// - skb:属于发送方的承载了netlink消息的skb
+/// - ssk:源sock结构
+/// ## 备注:
+/// - skb的所有者在本函数中发生了变化
+fn netlink_unicast_kernel(
+    sk: Arc<Mutex<Box<dyn NetlinkSocket>>>,
+    ssk: Arc<Mutex<Box<dyn NetlinkSocket>>>,
+    skb: Arc<RwLock<SkBuff>>,
+) -> u32 {
+    let mut ret: u32;
+    let nlk: Arc<RwLock<NetlinkSock>> = Arc::clone(&sk)
+        .arc_any()
+        .downcast()
+        .map_err(|_| SystemError::EINVAL)
+        .expect("Invalid downcast to LockedNetlinkSock");
+    let nlk_guard = nlk.read();
+    // ret = ECONNREFUSED = 111;
+    ret = 111;
+    // 检查内核netlink套接字是否注册了netlink_rcv回调(就是各个协议在创建内核netlink套接字时通常会传入的input函数)
+    if !nlk_guard.callback.is_none() {
+        ret = skb.read().len;
+        netlink_skb_set_owner_r(&skb, sk);
+        // todo: netlink_deliver_tap_kernel(sk, ssk, skb);
+        nlk_guard.callback.unwrap().netlink_rcv(skb.clone());
+        drop(skb);
+    } else {
+        // 如果指定的内核netlink套接字没有注册netlink_rcv回调,就直接丢弃所有收到的netlink消息
+        drop(skb);
+    }
+    return ret;
+}
+// https://code.dragonos.org.cn/s?refs=netlink_attachskb&project=linux-6.1.9
+/// 将一个指定skb绑定到一个指定的属于用户进程的netlink套接字上
+/// ## 参数
+/// - sk: 目的套接字
+/// - ssk: 源套接字
+/// - skb: 待绑定的skb
+/// - timeo: 超时时间
+/// ## 返回值
+/// - 小于0:表示错误,skb已经被释放,对套接字的引用也被释放。
+/// - 0:表示继续执行,skb可以被附加到套接字上。
+/// - 1:表示需要重新查找,可能因为等待超时或接收缓冲区不足。
+fn netlink_attachskb(
+    sk: Arc<Mutex<Box<dyn NetlinkSocket>>>,
+    skb: Arc<RwLock<SkBuff>>,
+    mut timeo: i64,
+    ssk: Arc<Mutex<Box<dyn NetlinkSocket>>>,
+) -> Result<u64, SystemError> {
+    let nlk: Arc<RwLock<NetlinkSock>> = Arc::clone(&sk)
+        .arc_any()
+        .downcast()
+        .map_err(|_| SystemError::EINVAL)?;
+    let nlk_guard = nlk.read();
+    let ssk_option: Option<Arc<Mutex<Box<dyn NetlinkSocket>>>> = Some(ssk.clone());
+
+    /*
+        如果目的netlink套接字上已经接收尚未处理的数据大小超过了接收缓冲区大小,
+        或者目的netlink套接字被设置了拥挤标志,
+        意味着该sbk不能立即被目的netlink套接字接收,需要加入等待队列
+    */
+    if sk.lock().sk_rmem_alloc() > sk.lock().sk_rcvbuf()
+        || nlk_guard.state == NetlinkState::NETLINK_S_CONGESTED
+    {
+        // 申请一个等待队列
+        let mut wq = WaitQueue::default();
+        // 如果传入的超时时间为0, 意味着非阻塞调用,则丢弃这条 netlink 消息,并返回 EAGAIN
+        if timeo == 0 {
+            /* 如果该netlink消息对应的源sock结构不存在,或者该netlink消息来自kernel
+             * 则对目的netlink套接字设置缓冲区溢出状态
+             */
+            if ssk_option.is_none() || ssk.lock().is_kernel() {
+                netlink_overrun(&sk);
+            }
+            drop(skb);
+            return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
+        }
+        // 程序运行到这里意味着是阻塞调用
+        // 改变当前进程状态为可中断
+        // __set_current_state(TASK_INTERRUPTIBLE);
+        // todo: 将目的netlink套接字加入等待队列
+        // add_wait_queue(&nlk_guard.wait, &wait);
+
+        // 程序到这里意味着被唤醒了
+        // 如果接收条件还是不满足,则要计算剩余的超时时间
+        if (sk.lock().sk_rmem_alloc() > sk.lock().sk_rcvbuf() ||
+        nlk_guard.state == NetlinkState::NETLINK_S_CONGESTED) &&
+        // todo: sock_flag
+		    sk.lock().flags() != Some(SockFlags::SockDead)
+        {
+            timeo = schedule_timeout(timeo)?;
+        }
+        // 改变当前进程状态为运行
+        // __set_current_state(TASK_RUNNING);
+        // 将目的 netlink 套接字从等待队列中删除
+        // remove_wait_queue(&nlk_guard.wait, &wait);
+
+        // todo: 如果在等待期间接收到信号
+        // if (signal_pending(current)) {
+        // 	drop(skb);
+        // 	return sock_intr_errno(*timeo);
+        // }
+        return Ok(1);
+    }
+    netlink_skb_set_owner_r(&skb, sk);
+    return Ok(0);
+}
+
+fn netlink_getsockbyportid(
+    ssk: Arc<Mutex<Box<dyn NetlinkSocket>>>,
+    portid: u32,
+) -> Result<Arc<Mutex<Box<dyn NetlinkSocket>>>, SystemError> {
+    let sock: Arc<Mutex<Box<dyn NetlinkSocket>>> = netlink_lookup(ssk.lock().sk_protocol(), portid);
+    if Some(sock.clone()).is_none() {
+        return Err(SystemError::ECONNREFUSED);
+    }
+
+    /* Don't bother queuing skb if kernel socket has no input function */
+    let nlk_sock: Arc<RwLock<NetlinkSock>> = Arc::clone(&sock)
+        .arc_any()
+        .downcast()
+        .map_err(|_| SystemError::EINVAL)?;
+    let nlk_sock_guard = nlk_sock.read();
+    let nlk_ssk: Arc<RwLock<NetlinkSock>> = Arc::clone(&ssk)
+        .arc_any()
+        .downcast()
+        .map_err(|_| SystemError::EINVAL)?;
+    let nlk_ssk_guard = nlk_ssk.read();
+    /* dst_portid and sk_state can be changed in netlink_connect() */
+    if sock.lock().sk_state() == NetlinkState::NetlinkUnconnected
+        && (nlk_sock_guard.dst_portid) != nlk_ssk_guard.portid
+    {
+        return Err(SystemError::ECONNREFUSED);
+    }
+    return Ok(sock);
+}

+ 9 - 0
kernel/src/net/socket/netlink/callback.rs

@@ -0,0 +1,9 @@
+use super::skbuff::SkBuff;
+use crate::libs::rwlock::RwLock;
+use alloc::sync::Arc;
+use core::fmt::Debug;
+pub trait NetlinkCallback: Send + Sync + Debug {
+    /// 接收到netlink数据包时的回调函数
+    fn netlink_rcv(&self, skb: Arc<RwLock<SkBuff>>) -> i32;
+}
+struct NetlinkCallbackData {}

+ 10 - 0
kernel/src/net/socket/netlink/endpoint.rs

@@ -0,0 +1,10 @@
+use crate::net::syscall::SockAddrNl;
+#[derive(Debug, Clone)]
+pub struct NetlinkEndpoint {
+    pub addr: SockAddrNl,
+}
+impl NetlinkEndpoint {
+    pub fn new(addr: SockAddrNl) -> Self {
+        NetlinkEndpoint { addr }
+    }
+}

+ 44 - 0
kernel/src/net/socket/netlink/mod.rs

@@ -0,0 +1,44 @@
+use alloc::sync::Arc;
+use netlink::NETLINK_KOBJECT_UEVENT;
+use system_error::SystemError;
+
+use crate::driver::base::uevent::KobjUeventEnv;
+
+use super::{family, inet::datagram, Inode, Socket, Type};
+
+//https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/
+/*
+..		-	-
+Kconfig
+Makefile
+af_netlink.c
+af_netlink.h
+diag.c  Netlink 套接字的诊断功能,主要用于查询内核中存在的 Netlink 套接字信息
+genetlink.c
+policy.c
+*/
+// Top-level module defining the public API for Netlink
+pub mod af_netlink;
+pub mod callback;
+pub mod endpoint;
+pub mod netlink;
+pub mod netlink_proto;
+pub mod skbuff;
+pub mod sock;
+
+pub struct Netlink;
+
+impl family::Family for Netlink {
+    /// 用户空间创建一个新的套接字的入口
+    fn socket(stype: Type, _protocol: u32) -> Result<Arc<Inode>, SystemError> {
+        let socket = create_netlink_socket(_protocol)?;
+        Ok(Inode::new(socket))
+    }
+}
+/// 用户空间创建一个新的Netlink套接字
+fn create_netlink_socket(_protocol: u32) -> Result<Arc<dyn Socket>, SystemError> {
+    match _protocol as usize {
+        NETLINK_KOBJECT_UEVENT => Ok(Arc::new(af_netlink::NetlinkSock::new())),
+        _ => Err(SystemError::EPROTONOSUPPORT),
+    }
+}

+ 319 - 0
kernel/src/net/socket/netlink/netlink.rs

@@ -0,0 +1,319 @@
+use alloc::{
+    boxed::Box,
+    slice,
+    sync::{Arc, Weak},
+    vec::Vec,
+};
+use system_error::SystemError;
+
+//定义Netlink消息的结构体,如NLmsghdr和geNLmsghdr(拓展的netlink消息头),以及用于封包和解包消息的函数。
+//参考 https://code.dragonos.org.cn/xref/linux-6.1.9/include/linux/netlink.h
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+// Ensure the header is only included once
+use crate::libs::mutex::Mutex;
+use core::mem;
+
+use super::af_netlink::{
+    netlink_insert, Listeners, NetlinkFlags, NetlinkSock, NetlinkSocket, NL_TABLE,
+};
+// Netlink protocol family
+pub const NETLINK_ROUTE: usize = 0;
+pub const NETLINK_UNUSED: usize = 1;
+pub const NETLINK_USERSOCK: usize = 2;
+pub const NETLINK_FIREWALL: usize = 3;
+pub const NETLINK_SOCK_DIAG: usize = 4;
+pub const NETLINK_NFLOG: usize = 5;
+pub const NETLINK_XFRM: usize = 6;
+pub const NETLINK_SELINUX: usize = 7;
+pub const NETLINK_ISCSI: usize = 8;
+pub const NETLINK_AUDIT: usize = 9;
+pub const NETLINK_FIB_LOOKUP: usize = 10;
+pub const NETLINK_CONNECTOR: usize = 11;
+pub const NETLINK_NETFILTER: usize = 12;
+pub const NETLINK_IP6_FW: usize = 13;
+pub const NETLINK_DNRTMSG: usize = 14;
+// implemente uevent needed
+pub const NETLINK_KOBJECT_UEVENT: usize = 15;
+pub const NETLINK_GENERIC: usize = 16;
+// pub const NETLINK_DM : usize = 17; // Assuming DM Events is unused, not defined
+pub const NETLINK_SCSITRANSPORT: usize = 18;
+pub const NETLINK_ECRYPTFS: usize = 19;
+pub const NETLINK_RDMA: usize = 20;
+pub const NETLINK_CRYPTO: usize = 21;
+pub const NETLINK_SMC: usize = 22;
+
+//pub const NETLINK_INET_DIAG = NETLINK_SOCK_DIAG;
+pub const NETLINK_INET_DIAG: usize = 4;
+
+pub const MAX_LINKS: usize = 32;
+
+pub const NL_CFG_F_NONROOT_RECV: u32 = 1 << 0;
+pub const NL_CFG_F_NONROOT_SEND: u32 = 1 << 1;
+
+bitflags! {
+/// 四种通用的消息类型 nlmsg_type
+pub struct NLmsgType: u8 {
+    /* Nothing.     */
+    const NLMSG_NOOP = 0x1;
+    /* Error       */
+    const NLMSG_ERROR = 0x2;
+    /* End of a dump    */
+    const NLMSG_DONE = 0x3;
+    /* Data lost     */
+    const NLMSG_OVERRUN = 0x4;
+}
+
+//消息标记 nlmsg_flags
+//  const NLM_F_REQUEST = 1; /* It is request message.     */
+//  const NLM_F_MULTI = 2; /* Multipart message, terminated by NLMSG_DONE */
+//  const NLM_F_ACK = 4; /* Reply with ack, with zero or error code */
+//  const NLM_F_ECHO = 8; /* Echo this request         */
+//  const NLM_F_DUMP_INTR = 16; /* Dump was inconsistent due to sequence change */
+pub struct NLmsgFlags: u16 {
+    /* Flags values */
+    const NLM_F_REQUEST = 0x01;
+    const NLM_F_MULTI = 0x02;
+    const NLM_F_ACK = 0x04;
+    const NLM_F_ECHO = 0x08;
+    const NLM_F_DUMP_INTR = 0x10;
+    const NLM_F_DUMP_FILTERED = 0x20;
+
+    /* Modifiers to GET request */
+    const NLM_F_ROOT = 0x100; /* specify tree root    */
+    const NLM_F_MATCH = 0x200; /* return all matching    */
+    const NLM_F_ATOMIC = 0x400; /* atomic GET        */
+    //const NLM_F_DUMP = NLM_F_ROOT | NLM_F_MATCH;
+    const NLM_F_DUMP = 0x100 | 0x200;
+
+    /* Modifiers to NEW request */
+    const NLM_F_REPLACE = 0x100; /* Override existing        */
+    const NLM_F_EXCL = 0x200; /* Do not touch, if it exists    */
+    const NLM_F_CREATE = 0x400; /* Create, if it does not exist    */
+    const NLM_F_APPEND = 0x800; /* Add to end of list        */
+
+    /* Modifiers to DELETE request */
+    const NLM_F_NONREC = 0x100;	/* Do not delete recursively	*/
+
+     /* Flags for ACK message */
+    const NLM_F_CAPPED = 0x100;	/* request was capped */
+    const NLM_F_ACK_TLVS = 0x200;	/* extended ACK TVLs were included */
+}
+}
+/// netlink消息报头
+/**
+ * struct NLmsghdr - fixed format metadata header of Netlink messages
+ * @nlmsg_len:   Length of message including header
+ * @nlmsg_type:  Message content type
+ * @nlmsg_flags: Additional flags
+ * @nlmsg_seq:   Sequence number
+ * @nlmsg_pid:   Sending process port ID
+ */
+pub struct NLmsghdr {
+    pub nlmsg_len: usize,
+    pub nlmsg_type: NLmsgType,
+    pub nlmsg_flags: NLmsgFlags,
+    pub nlmsg_seq: u32,
+    pub nlmsg_pid: u32,
+}
+
+const NLMSG_ALIGNTO: usize = 4;
+#[derive(Debug, PartialEq, Copy, Clone)]
+pub enum NetlinkState {
+    NetlinkUnconnected = 0,
+    NetlinkConnected,
+    NETLINK_S_CONGESTED = 2,
+}
+
+fn nlmsg_align(len: usize) -> usize {
+    (len + NLMSG_ALIGNTO - 1) & !(NLMSG_ALIGNTO - 1)
+}
+
+fn nlmsg_hdrlen() -> usize {
+    nlmsg_align(mem::size_of::<NLmsghdr>())
+}
+
+fn nlmsg_length(len: usize) -> usize {
+    len + nlmsg_hdrlen()
+}
+
+fn nlmsg_space(len: usize) -> usize {
+    nlmsg_align(nlmsg_length(len))
+}
+
+unsafe fn nlmsg_data(nlh: &NLmsghdr) -> *mut u8 {
+    ((nlh as *const NLmsghdr) as *mut u8).add(nlmsg_length(0))
+}
+
+unsafe fn nlmsg_next(nlh: *mut NLmsghdr, len: usize) -> *mut NLmsghdr {
+    let nlmsg_len = (*nlh).nlmsg_len;
+    let new_len = len - nlmsg_align(nlmsg_len);
+    nlh.add(nlmsg_align(nlmsg_len))
+}
+
+fn nlmsg_ok(nlh: &NLmsghdr, len: usize) -> bool {
+    len >= nlmsg_hdrlen() && nlh.nlmsg_len >= nlmsg_hdrlen() && nlh.nlmsg_len <= len
+}
+
+fn nlmsg_payload(nlh: &NLmsghdr, len: usize) -> usize {
+    nlh.nlmsg_len - nlmsg_space(len)
+}
+// 定义类型别名来简化闭包类型的定义
+type InputCallback = Arc<dyn FnMut() + Send + Sync>;
+type BindCallback = Arc<dyn Fn(i32) -> i32 + Send + Sync>;
+type UnbindCallback = Arc<dyn Fn(i32) -> i32 + Send + Sync>;
+type CompareCallback = Arc<dyn Fn(&NetlinkSock) -> bool + Send + Sync>;
+/// 该结构包含了内核netlink的可选参数:
+#[derive(Default)]
+pub struct NetlinkKernelCfg {
+    pub groups: u32,
+    pub flags: u32,
+    pub input: Option<InputCallback>,
+    pub bind: Option<BindCallback>,
+    pub unbind: Option<UnbindCallback>,
+    pub compare: Option<CompareCallback>,
+}
+
+impl NetlinkKernelCfg {
+    pub fn new() -> Self {
+        NetlinkKernelCfg {
+            groups: 32,
+            flags: 0,
+            input: None,
+            bind: None,
+            unbind: None,
+            compare: None,
+        }
+    }
+
+    pub fn set_input<F>(&mut self, callback: F)
+    where
+        F: FnMut() + Send + Sync + 'static,
+    {
+        self.input = Some(Arc::new(callback));
+    }
+
+    pub fn set_bind<F>(&mut self, callback: F)
+    where
+        F: Fn(i32) -> i32 + Send + Sync + 'static,
+    {
+        self.bind = Some(Arc::new(callback));
+    }
+
+    pub fn set_unbind<F>(&mut self, callback: F)
+    where
+        F: Fn(i32) -> i32 + Send + Sync + 'static,
+    {
+        self.unbind = Some(Arc::new(callback));
+    }
+
+    pub fn set_compare<F>(&mut self, callback: F)
+    where
+        F: Fn(&NetlinkSock) -> bool + Send + Sync + 'static,
+    {
+        self.compare = Some(Arc::new(callback));
+    }
+}
+//https://code.dragonos.org.cn/xref/linux-6.1.9/include/linux/netlink.h#229
+//netlink属性头
+struct NLattr {
+    nla_len: u16,
+    nla_type: u16,
+}
+
+pub trait VecExt {
+    fn align4(&mut self);
+    fn push_ext<T: Sized>(&mut self, data: T);
+    fn set_ext<T: Sized>(&mut self, offset: usize, data: T);
+}
+
+impl VecExt for Vec<u8> {
+    fn align4(&mut self) {
+        let len = (self.len() + 3) & !3;
+        if len > self.len() {
+            self.resize(len, 0);
+        }
+    }
+
+    fn push_ext<T: Sized>(&mut self, data: T) {
+        #[allow(unsafe_code)]
+        let bytes =
+            unsafe { slice::from_raw_parts(&data as *const T as *const u8, size_of::<T>()) };
+        for byte in bytes {
+            self.push(*byte);
+        }
+    }
+
+    fn set_ext<T: Sized>(&mut self, offset: usize, data: T) {
+        if self.len() < offset + size_of::<T>() {
+            self.resize(offset + size_of::<T>(), 0);
+        }
+        #[allow(unsafe_code)]
+        let bytes =
+            unsafe { slice::from_raw_parts(&data as *const T as *const u8, size_of::<T>()) };
+        self[offset..(bytes.len() + offset)].copy_from_slice(bytes);
+    }
+}
+
+// todo: net namespace
+pub fn netlink_kernel_create(
+    unit: usize,
+    cfg: Option<NetlinkKernelCfg>,
+) -> Result<NetlinkSock, SystemError> {
+    // THIS_MODULE
+    let mut nlk: NetlinkSock = NetlinkSock::new();
+    let sk: Arc<Mutex<Box<dyn NetlinkSocket>>> = Arc::new(Mutex::new(Box::new(nlk.clone())));
+    let groups: u32;
+    if unit >= MAX_LINKS {
+        return Err(SystemError::EINVAL);
+    }
+    __netlink_create(&mut nlk, unit, 1).expect("__netlink_create failed");
+
+    if let Some(cfg) = cfg.as_ref() {
+        if cfg.groups < 32 {
+            groups = 32;
+        } else {
+            groups = cfg.groups;
+        }
+    } else {
+        groups = 32;
+    }
+    let listeners = Listeners::new();
+    // todo:设计和实现回调函数
+    // sk.sk_data_read = netlink_data_ready;
+    // if cfg.is_some() && cfg.unwrap().input.is_some(){
+    //     nlk.netlink_rcv = cfg.unwrap().input;
+    // }
+    netlink_insert(sk, 0).expect("netlink_insert failed");
+    nlk.flags |= NetlinkFlags::NETLINK_F_KERNEL_SOCKET.bits();
+
+    let mut nl_table = NL_TABLE.write();
+    if nl_table[unit].get_registered() == 0 {
+        nl_table[unit].set_groups(groups);
+        if let Some(cfg) = cfg.as_ref() {
+            nl_table[unit].bind = cfg.bind.clone();
+            nl_table[unit].unbind = cfg.unbind.clone();
+            nl_table[unit].set_flags(cfg.flags);
+            if cfg.compare.is_some() {
+                nl_table[unit].compare = cfg.compare.clone();
+            }
+            nl_table[unit].set_registered(1);
+        } else {
+            drop(listeners);
+            let registered = nl_table[unit].get_registered();
+            nl_table[unit].set_registered(registered + 1);
+        }
+    }
+    return Ok(nlk);
+}
+
+fn __netlink_create(nlk: &mut NetlinkSock, unit: usize, kern: usize) -> Result<i32, SystemError> {
+    // 其他的初始化配置参数
+    nlk.flags = kern as u32;
+    nlk.protocol = unit;
+    return Ok(0);
+}
+
+pub fn sk_data_ready(nlk: Arc<NetlinkSock>) -> Result<(), SystemError> {
+    // 唤醒
+    return Ok(());
+}

+ 56 - 0
kernel/src/net/socket/netlink/netlink_proto.rs

@@ -0,0 +1,56 @@
+use bitmap::{traits::BitMapOps, AllocBitmap};
+use core::intrinsics::unlikely;
+use system_error::SystemError;
+
+use crate::libs::lazy_init::Lazy;
+pub const PROTO_INUSE_NR: usize = 64;
+// pub static mut PROTO_INUSE_IDX: Lazy<AllocBitmap> =  Lazy::new();
+// pub static PROTO_INUSE_IDX: Lazy<AllocBitmap> = Lazy::new(<AllocBitmap::new(PROTO_INUSE_NR));
+/// 协议操作集的trait
+pub trait Protocol {
+    fn close(&self);
+    // fn first_false_index(&self, proto_inuse_idx:usize, proto_inuse_nr:usize)->usize;
+}
+/// 协议操作集的结构体
+pub struct Proto<'a> {
+    name: &'a str,
+    // owner: THIS_MODULE,
+    obj_size: usize,
+    inuse_idx: Option<usize>,
+}
+impl Protocol for Proto<'_> {
+    fn close(&self) {}
+}
+/// 静态变量,用于注册netlink协议,是一个操作集结构体的实例
+// https://code.dragonos.org.cn/xref/linux-6.1.9/net/netlink/af_netlink.c#634
+pub static mut NETLINK_PROTO: Proto = Proto {
+    name: "NETLINK",
+    // owner: THIS_MODULE,
+    obj_size: core::mem::size_of::<Proto>(),
+    // 运行时分配的索引
+    inuse_idx: None,
+};
+// https://code.dragonos.org.cn/xref/linux-6.1.9/net/core/sock.c?fi=proto_register#3853
+/// 注册协议
+pub fn proto_register(proto: &mut Proto, alloc_slab: i32) -> Result<i32, SystemError> {
+    let mut ret = Err(SystemError::ENOBUFS);
+    if alloc_slab != 0 {
+        log::info!("TODO: netlink_proto: slab allocation not supported\n");
+        return ret;
+    }
+    ret = assign_proto_idx(proto);
+    ret
+}
+// https://code.dragonos.org.cn/xref/linux-6.1.9/net/core/sock.c?fi=proto_register#3752
+/// 为协议分配一个索引
+pub fn assign_proto_idx(prot: &mut Proto) -> Result<i32, SystemError> {
+    // prot.inuse_idx = unsafe { PROTO_INUSE_IDX.first_false_index() };
+    // 如果没有找到空闲的索引
+    if unlikely(prot.inuse_idx == Some(PROTO_INUSE_NR - 1)) {
+        log::info!("PROTO_INUSE_NR exhausted\n");
+        return Err(SystemError::ENOSPC);
+    }
+    // 为协议分配一个索引
+    // unsafe { PROTO_INUSE_IDX.set((prot.inuse_idx).unwrap(), true) };
+    return Ok(0);
+}

+ 109 - 0
kernel/src/net/socket/netlink/skbuff.rs

@@ -0,0 +1,109 @@
+use super::af_netlink::{NetlinkSock, NetlinkSocket};
+use crate::libs::{mutex::Mutex, rwlock::RwLock};
+use alloc::{boxed::Box, sync::Arc};
+// 曾用方案:在 smoltcp::PacketBuffer 的基础上封装了一层,用于处理 netlink 协议中网络数据包(skb)的相关操作
+// 暂时弃用,目前尝试使用更简单的方式处理 skb
+#[derive(Debug, Clone)]
+pub struct SkBuff {
+    pub sk: Arc<Mutex<Box<dyn NetlinkSocket>>>,
+    pub len: u32,
+    pub pkt_type: u32,
+    pub mark: u32,
+    pub queue_mapping: u32,
+    pub protocol: u32,
+    pub vlan_present: u32,
+    pub vlan_tci: u32,
+    pub vlan_proto: u32,
+    pub priority: u32,
+    pub ingress_ifindex: u32,
+    pub ifindex: u32,
+    pub tc_index: u32,
+    pub cb: [u32; 5],
+    pub hash: u32,
+    pub tc_classid: u32,
+    pub data: u32,
+    pub data_end: u32,
+    pub napi_id: u32,
+    pub family: u32,
+    pub remote_ip4: u32,
+    pub local_ip4: u32,
+    pub remote_ip6: [u32; 4],
+    pub local_ip6: [u32; 4],
+    pub remote_port: u32,
+    pub local_port: u32,
+    pub data_meta: u32,
+    pub tstamp: u64,
+    pub wire_len: u32,
+    pub gso_segs: u32,
+    pub gso_size: u32,
+    pub tstamp_type: u8,
+    pub _bitfield_align_1: [u8; 0],
+    pub hwtstamp: u64,
+}
+impl SkBuff {
+    pub fn new() -> Self {
+        SkBuff {
+            sk: Arc::new(Mutex::new(Box::new(NetlinkSock::new()))),
+            len: 0,
+            pkt_type: 0,
+            mark: 0,
+            queue_mapping: 0,
+            protocol: 0,
+            vlan_present: 0,
+            vlan_tci: 0,
+            vlan_proto: 0,
+            priority: 0,
+            ingress_ifindex: 0,
+            ifindex: 0,
+            tc_index: 0,
+            cb: [0; 5],
+            hash: 0,
+            tc_classid: 0,
+            data: 0,
+            data_end: 0,
+            napi_id: 0,
+            family: 0,
+            remote_ip4: 0,
+            local_ip4: 0,
+            remote_ip6: [0; 4],
+            local_ip6: [0; 4],
+            remote_port: 0,
+            local_port: 0,
+            data_meta: 0,
+            tstamp: 0,
+            wire_len: 0,
+            gso_segs: 0,
+            gso_size: 0,
+            tstamp_type: 0,
+            _bitfield_align_1: [0; 0],
+            hwtstamp: 0,
+        }
+    }
+    pub fn is_empty(&self) -> bool {
+        self.len == 0
+    }
+}
+
+// 处理网络套接字的过度运行情况
+pub fn netlink_overrun(sk: &Arc<Mutex<Box<dyn NetlinkSocket>>>) {
+    // Implementation of the function
+}
+
+// 用于检查网络数据包(skb)是否被共享
+pub fn skb_shared(skb: &RwLock<SkBuff>) -> bool {
+    // Implementation of the function
+    false
+}
+
+/// 处理被孤儿化的网络数据包(skb)
+/// 孤儿化网络数据包意味着数据包不再与任何套接字关联,
+/// 通常是因为发送数据包时指定了 MSG_DONTWAIT 标志,这告诉内核不要等待必要的资源(如内存),而是尽可能快地发送数据包。
+pub fn skb_orphan(skb: &Arc<RwLock<SkBuff>>) {
+    // TODO: Implementation of the function
+}
+
+fn skb_recv_datagram() {}
+
+fn skb_try_recv_datagram() {}
+
+fn skb_try_recv_from_queue() {}

+ 34 - 0
kernel/src/net/socket/netlink/sock.rs

@@ -0,0 +1,34 @@
+// Sock flags in Rust
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum SockFlags {
+    SockDead,
+    SockDone,
+    SockUrginline,
+    SockKeepopen,
+    SockLinger,
+    SockDestroy,
+    SockBroadcast,
+    SockTimestamp,
+    SockZapped,
+    SockUseWriteQueue,          // whether to call sk->sk_write_space in sock_wfree
+    SockDbg,                    // %SO_DEBUG setting
+    SockRcvtstamp,              // %SO_TIMESTAMP setting
+    SockRcvtstampns,            // %SO_TIMESTAMPNS setting
+    SockLocalroute,             // route locally only, %SO_DONTROUTE setting
+    SockMemalloc,               // VM depends on this socket for swapping
+    SockTimestampingRxSoftware, // %SOF_TIMESTAMPING_RX_SOFTWARE
+    SockFasync,                 // fasync() active
+    SockRxqOvfl,
+    SockZerocopy,   // buffers from userspace
+    SockWifiStatus, // push wifi status to userspace
+    SockNofcs,      // Tell NIC not to do the Ethernet FCS.
+    // Will use last 4 bytes of packet sent from
+    // user-space instead.
+    SockFilterLocked,   // Filter cannot be changed anymore
+    SockSelectErrQueue, // Wake select on error queue
+    SockRcuFree,        // wait rcu grace period in sk_destruct()
+    SockTxtime,
+    SockXdp,       // XDP is attached
+    SockTstampNew, // Indicates 64 bit timestamps always
+    SockRcvmark,   // Receive SO_MARK ancillary data with packet
+}

+ 0 - 239
kernel/src/net/socket/unix.rs

@@ -1,239 +0,0 @@
-use alloc::{boxed::Box, sync::Arc, vec::Vec};
-use system_error::SystemError;
-
-use crate::{libs::spinlock::SpinLock, net::Endpoint};
-
-use super::{
-    handle::GlobalSocketHandle, PosixSocketHandleItem, Socket, SocketInode, SocketMetadata,
-    SocketOptions, SocketType,
-};
-
-#[derive(Debug, Clone)]
-pub struct StreamSocket {
-    metadata: SocketMetadata,
-    buffer: Arc<SpinLock<Vec<u8>>>,
-    peer_inode: Option<Arc<SocketInode>>,
-    handle: GlobalSocketHandle,
-    posix_item: Arc<PosixSocketHandleItem>,
-}
-
-impl StreamSocket {
-    /// 默认的元数据缓冲区大小
-    pub const DEFAULT_METADATA_BUF_SIZE: usize = 1024;
-    /// 默认的缓冲区大小
-    pub const DEFAULT_BUF_SIZE: usize = 64 * 1024;
-
-    /// # 创建一个 Stream Socket
-    ///
-    /// ## 参数
-    /// - `options`: socket选项
-    pub fn new(options: SocketOptions) -> Self {
-        let buffer = Arc::new(SpinLock::new(Vec::with_capacity(Self::DEFAULT_BUF_SIZE)));
-
-        let metadata = SocketMetadata::new(
-            SocketType::Unix,
-            Self::DEFAULT_BUF_SIZE,
-            Self::DEFAULT_BUF_SIZE,
-            Self::DEFAULT_METADATA_BUF_SIZE,
-            options,
-        );
-
-        let posix_item = Arc::new(PosixSocketHandleItem::new(None));
-
-        Self {
-            metadata,
-            buffer,
-            peer_inode: None,
-            handle: GlobalSocketHandle::new_kernel_handle(),
-            posix_item,
-        }
-    }
-}
-
-impl Socket for StreamSocket {
-    fn posix_item(&self) -> Arc<PosixSocketHandleItem> {
-        self.posix_item.clone()
-    }
-    fn socket_handle(&self) -> GlobalSocketHandle {
-        self.handle
-    }
-
-    fn close(&mut self) {}
-
-    fn read(&self, buf: &mut [u8]) -> (Result<usize, SystemError>, Endpoint) {
-        let mut buffer = self.buffer.lock_irqsave();
-
-        let len = core::cmp::min(buf.len(), buffer.len());
-        buf[..len].copy_from_slice(&buffer[..len]);
-
-        let _ = buffer.split_off(len);
-
-        (Ok(len), Endpoint::Inode(self.peer_inode.clone()))
-    }
-
-    fn write(&self, buf: &[u8], _to: Option<Endpoint>) -> Result<usize, SystemError> {
-        if self.peer_inode.is_none() {
-            return Err(SystemError::ENOTCONN);
-        }
-
-        let peer_inode = self.peer_inode.clone().unwrap();
-        let len = peer_inode.inner().write_buffer(buf)?;
-        Ok(len)
-    }
-
-    fn connect(&mut self, endpoint: Endpoint) -> Result<(), SystemError> {
-        if self.peer_inode.is_some() {
-            return Err(SystemError::EISCONN);
-        }
-
-        if let Endpoint::Inode(inode) = endpoint {
-            self.peer_inode = inode;
-            Ok(())
-        } else {
-            Err(SystemError::EINVAL)
-        }
-    }
-
-    fn write_buffer(&self, buf: &[u8]) -> Result<usize, SystemError> {
-        let mut buffer = self.buffer.lock_irqsave();
-
-        let len = buf.len();
-        if buffer.capacity() - buffer.len() < len {
-            return Err(SystemError::ENOBUFS);
-        }
-        buffer.extend_from_slice(buf);
-
-        Ok(len)
-    }
-
-    fn metadata(&self) -> SocketMetadata {
-        self.metadata.clone()
-    }
-
-    fn box_clone(&self) -> Box<dyn Socket> {
-        Box::new(self.clone())
-    }
-
-    fn as_any_ref(&self) -> &dyn core::any::Any {
-        self
-    }
-
-    fn as_any_mut(&mut self) -> &mut dyn core::any::Any {
-        self
-    }
-}
-
-#[derive(Debug, Clone)]
-pub struct SeqpacketSocket {
-    metadata: SocketMetadata,
-    buffer: Arc<SpinLock<Vec<u8>>>,
-    peer_inode: Option<Arc<SocketInode>>,
-    handle: GlobalSocketHandle,
-    posix_item: Arc<PosixSocketHandleItem>,
-}
-
-impl SeqpacketSocket {
-    /// 默认的元数据缓冲区大小
-    pub const DEFAULT_METADATA_BUF_SIZE: usize = 1024;
-    /// 默认的缓冲区大小
-    pub const DEFAULT_BUF_SIZE: usize = 64 * 1024;
-
-    /// # 创建一个 Seqpacket Socket
-    ///
-    /// ## 参数
-    /// - `options`: socket选项
-    pub fn new(options: SocketOptions) -> Self {
-        let buffer = Arc::new(SpinLock::new(Vec::with_capacity(Self::DEFAULT_BUF_SIZE)));
-
-        let metadata = SocketMetadata::new(
-            SocketType::Unix,
-            Self::DEFAULT_BUF_SIZE,
-            Self::DEFAULT_BUF_SIZE,
-            Self::DEFAULT_METADATA_BUF_SIZE,
-            options,
-        );
-
-        let posix_item = Arc::new(PosixSocketHandleItem::new(None));
-
-        Self {
-            metadata,
-            buffer,
-            peer_inode: None,
-            handle: GlobalSocketHandle::new_kernel_handle(),
-            posix_item,
-        }
-    }
-}
-
-impl Socket for SeqpacketSocket {
-    fn posix_item(&self) -> Arc<PosixSocketHandleItem> {
-        self.posix_item.clone()
-    }
-    fn close(&mut self) {}
-
-    fn read(&self, buf: &mut [u8]) -> (Result<usize, SystemError>, Endpoint) {
-        let mut buffer = self.buffer.lock_irqsave();
-
-        let len = core::cmp::min(buf.len(), buffer.len());
-        buf[..len].copy_from_slice(&buffer[..len]);
-
-        let _ = buffer.split_off(len);
-
-        (Ok(len), Endpoint::Inode(self.peer_inode.clone()))
-    }
-
-    fn write(&self, buf: &[u8], _to: Option<Endpoint>) -> Result<usize, SystemError> {
-        if self.peer_inode.is_none() {
-            return Err(SystemError::ENOTCONN);
-        }
-
-        let peer_inode = self.peer_inode.clone().unwrap();
-        let len = peer_inode.inner().write_buffer(buf)?;
-        Ok(len)
-    }
-
-    fn connect(&mut self, endpoint: Endpoint) -> Result<(), SystemError> {
-        if self.peer_inode.is_some() {
-            return Err(SystemError::EISCONN);
-        }
-
-        if let Endpoint::Inode(inode) = endpoint {
-            self.peer_inode = inode;
-            Ok(())
-        } else {
-            Err(SystemError::EINVAL)
-        }
-    }
-
-    fn write_buffer(&self, buf: &[u8]) -> Result<usize, SystemError> {
-        let mut buffer = self.buffer.lock_irqsave();
-
-        let len = buf.len();
-        if buffer.capacity() - buffer.len() < len {
-            return Err(SystemError::ENOBUFS);
-        }
-        buffer.extend_from_slice(buf);
-
-        Ok(len)
-    }
-
-    fn socket_handle(&self) -> GlobalSocketHandle {
-        self.handle
-    }
-
-    fn metadata(&self) -> SocketMetadata {
-        self.metadata.clone()
-    }
-
-    fn box_clone(&self) -> Box<dyn Socket> {
-        Box::new(self.clone())
-    }
-
-    fn as_any_ref(&self) -> &dyn core::any::Any {
-        self
-    }
-
-    fn as_any_mut(&mut self) -> &mut dyn core::any::Any {
-        self
-    }
-}

+ 37 - 0
kernel/src/net/socket/unix/mod.rs

@@ -0,0 +1,37 @@
+pub(crate) mod seqpacket;
+mod stream;
+use crate::{filesystem::vfs::InodeId, libs::rwlock::RwLock, net::socket::*};
+use alloc::sync::Arc;
+use hashbrown::HashMap;
+use system_error::SystemError::{self, *};
+pub struct Unix;
+
+lazy_static! {
+    pub static ref INODE_MAP: RwLock<HashMap<InodeId, Endpoint>> = RwLock::new(HashMap::new());
+}
+
+fn create_unix_socket(sock_type: Type) -> Result<Arc<Inode>, SystemError> {
+    match sock_type {
+        Type::Stream | Type::Datagram => stream::StreamSocket::new_inode(),
+        Type::SeqPacket => seqpacket::SeqpacketSocket::new_inode(false),
+        _ => Err(EPROTONOSUPPORT),
+    }
+}
+
+impl family::Family for Unix {
+    fn socket(stype: Type, _protocol: u32) -> Result<Arc<Inode>, SystemError> {
+        let socket = create_unix_socket(stype)?;
+        Ok(socket)
+    }
+}
+
+impl Unix {
+    pub fn new_pairs(socket_type: Type) -> Result<(Arc<Inode>, Arc<Inode>), SystemError> {
+        log::debug!("socket_type {:?}", socket_type);
+        match socket_type {
+            Type::SeqPacket => seqpacket::SeqpacketSocket::new_pairs(),
+            Type::Stream | Type::Datagram => stream::StreamSocket::new_pairs(),
+            _ => todo!(),
+        }
+    }
+}

+ 260 - 0
kernel/src/net/socket/unix/seqpacket/inner.rs

@@ -0,0 +1,260 @@
+use alloc::string::String;
+use alloc::{collections::VecDeque, sync::Arc};
+use core::sync::atomic::{AtomicUsize, Ordering};
+
+use super::SeqpacketSocket;
+use crate::{
+    libs::mutex::Mutex,
+    net::socket::{buffer::Buffer, endpoint::Endpoint, Inode, ShutdownTemp},
+};
+use system_error::SystemError::{self, *};
+
+#[derive(Debug)]
+pub(super) struct Init {
+    inode: Option<Endpoint>,
+}
+
+impl Init {
+    pub(super) fn new() -> Self {
+        Self { inode: None }
+    }
+
+    pub(super) fn bind(&mut self, epoint_to_bind: Endpoint) -> Result<(), SystemError> {
+        if self.inode.is_some() {
+            log::error!("the socket is already bound");
+            return Err(EINVAL);
+        }
+        match epoint_to_bind {
+            Endpoint::Inode(_) => self.inode = Some(epoint_to_bind),
+            _ => return Err(EINVAL),
+        }
+
+        return Ok(());
+    }
+
+    pub fn bind_path(&mut self, sun_path: String) -> Result<Endpoint, SystemError> {
+        if self.inode.is_none() {
+            log::error!("the socket is not bound");
+            return Err(EINVAL);
+        }
+        if let Some(Endpoint::Inode((inode, mut path))) = self.inode.take() {
+            path = sun_path;
+            let epoint = Endpoint::Inode((inode, path));
+            self.inode.replace(epoint.clone());
+            return Ok(epoint);
+        };
+
+        return Err(SystemError::EINVAL);
+    }
+
+    pub fn endpoint(&self) -> Option<&Endpoint> {
+        return self.inode.as_ref();
+    }
+}
+
+#[derive(Debug)]
+pub(super) struct Listener {
+    inode: Endpoint,
+    backlog: AtomicUsize,
+    incoming_conns: Mutex<VecDeque<Arc<Inode>>>,
+}
+
+impl Listener {
+    pub(super) fn new(inode: Endpoint, backlog: usize) -> Self {
+        log::debug!("backlog {}", backlog);
+        let back = if backlog > 1024 {
+            1024 as usize
+        } else {
+            backlog
+        };
+        return Self {
+            inode,
+            backlog: AtomicUsize::new(back),
+            incoming_conns: Mutex::new(VecDeque::with_capacity(back)),
+        };
+    }
+    pub(super) fn endpoint(&self) -> &Endpoint {
+        return &self.inode;
+    }
+
+    pub(super) fn try_accept(&self) -> Result<(Arc<Inode>, Endpoint), SystemError> {
+        let mut incoming_conns = self.incoming_conns.lock();
+        log::debug!(" incom len {}", incoming_conns.len());
+        let conn = incoming_conns
+            .pop_front()
+            .ok_or_else(|| SystemError::EAGAIN_OR_EWOULDBLOCK)?;
+        let socket =
+            Arc::downcast::<SeqpacketSocket>(conn.inner()).map_err(|_| SystemError::EINVAL)?;
+        let peer = match &*socket.inner.read() {
+            Inner::Connected(connected) => connected.peer_endpoint().unwrap().clone(),
+            _ => return Err(SystemError::ENOTCONN),
+        };
+
+        return Ok((Inode::new(socket), peer));
+    }
+
+    pub(super) fn listen(&self, backlog: usize) -> Result<(), SystemError> {
+        self.backlog.store(backlog, Ordering::Relaxed);
+        Ok(())
+    }
+
+    pub(super) fn push_incoming(
+        &self,
+        client_epoint: Option<Endpoint>,
+    ) -> Result<Connected, SystemError> {
+        let mut incoming_conns = self.incoming_conns.lock();
+        if incoming_conns.len() >= self.backlog.load(Ordering::Relaxed) {
+            log::error!("the pending connection queue on the listening socket is full");
+            return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
+        }
+
+        let new_server = SeqpacketSocket::new(false);
+        let new_inode = Inode::new(new_server.clone());
+        // log::debug!("new inode {:?},client_epoint {:?}",new_inode,client_epoint);
+        let path = match &self.inode {
+            Endpoint::Inode((_, path)) => path.clone(),
+            _ => return Err(SystemError::EINVAL),
+        };
+
+        let (server_conn, client_conn) = Connected::new_pair(
+            Some(Endpoint::Inode((new_inode.clone(), path))),
+            client_epoint,
+        );
+        *new_server.inner.write() = Inner::Connected(server_conn);
+        incoming_conns.push_back(new_inode);
+
+        // TODO: epollin
+
+        Ok(client_conn)
+    }
+
+    pub(super) fn is_acceptable(&self) -> bool {
+        return self.incoming_conns.lock().len() != 0;
+    }
+}
+
+#[derive(Debug)]
+pub struct Connected {
+    inode: Option<Endpoint>,
+    peer_inode: Option<Endpoint>,
+    buffer: Arc<Buffer>,
+}
+
+impl Connected {
+    /// 默认的缓冲区大小
+    pub const DEFAULT_BUF_SIZE: usize = 64 * 1024;
+
+    pub fn new_pair(
+        inode: Option<Endpoint>,
+        peer_inode: Option<Endpoint>,
+    ) -> (Connected, Connected) {
+        let this = Connected {
+            inode: inode.clone(),
+            peer_inode: peer_inode.clone(),
+            buffer: Buffer::new(),
+        };
+        let peer = Connected {
+            inode: peer_inode,
+            peer_inode: inode,
+            buffer: Buffer::new(),
+        };
+
+        (this, peer)
+    }
+
+    pub fn set_peer_inode(&mut self, peer_epoint: Option<Endpoint>) {
+        self.peer_inode = peer_epoint;
+    }
+
+    pub fn set_inode(&mut self, epoint: Option<Endpoint>) {
+        self.inode = epoint;
+    }
+
+    pub fn endpoint(&self) -> Option<&Endpoint> {
+        self.inode.as_ref()
+    }
+
+    pub fn peer_endpoint(&self) -> Option<&Endpoint> {
+        self.peer_inode.as_ref()
+    }
+
+    pub fn try_read(&self, buf: &mut [u8]) -> Result<usize, SystemError> {
+        if self.can_recv() {
+            return self.recv_slice(buf);
+        } else {
+            return Err(SystemError::EINVAL);
+        }
+    }
+
+    pub fn try_write(&self, buf: &[u8]) -> Result<usize, SystemError> {
+        if self.can_send()? {
+            return self.send_slice(buf);
+        } else {
+            log::debug!("can not send {:?}", String::from_utf8_lossy(&buf[..]));
+            return Err(SystemError::ENOBUFS);
+        }
+    }
+
+    pub fn can_recv(&self) -> bool {
+        return !self.buffer.is_read_buf_empty();
+    }
+
+    // 检查发送缓冲区是否满了
+    pub fn can_send(&self) -> Result<bool, SystemError> {
+        // let sebuffer = self.sebuffer.lock(); // 获取锁
+        // sebuffer.capacity()-sebuffer.len() ==0;
+        let peer_inode = match self.peer_inode.as_ref().unwrap() {
+            Endpoint::Inode((inode, _)) => inode,
+            _ => return Err(SystemError::EINVAL),
+        };
+        let peer_socket = Arc::downcast::<SeqpacketSocket>(peer_inode.inner())
+            .map_err(|_| SystemError::EINVAL)?;
+        let is_full = match &*peer_socket.inner.read() {
+            Inner::Connected(connected) => connected.buffer.is_read_buf_full(),
+            _ => return Err(SystemError::EINVAL),
+        };
+        Ok(!is_full)
+    }
+
+    pub fn recv_slice(&self, buf: &mut [u8]) -> Result<usize, SystemError> {
+        return self.buffer.read_read_buffer(buf);
+    }
+
+    pub fn send_slice(&self, buf: &[u8]) -> Result<usize, SystemError> {
+        //找到peer_inode,并将write_buffer的内容写入对端的read_buffer
+        let peer_inode = match self.peer_inode.as_ref().unwrap() {
+            Endpoint::Inode((inode, _)) => inode,
+            _ => return Err(SystemError::EINVAL),
+        };
+        let peer_socket = Arc::downcast::<SeqpacketSocket>(peer_inode.inner())
+            .map_err(|_| SystemError::EINVAL)?;
+        let usize = match &*peer_socket.inner.write() {
+            Inner::Connected(connected) => {
+                let usize = connected.buffer.write_read_buffer(buf)?;
+                usize
+            }
+            _ => return Err(SystemError::EINVAL),
+        };
+        peer_socket.wait_queue.wakeup(None);
+        Ok(usize)
+    }
+
+    pub fn shutdown(&self, how: ShutdownTemp) -> Result<(), SystemError> {
+        if how.is_empty() {
+            return Err(SystemError::EINVAL);
+        } else if how.is_send_shutdown() {
+            unimplemented!("unimplemented!");
+        } else if how.is_recv_shutdown() {
+            unimplemented!("unimplemented!");
+        }
+
+        Ok(())
+    }
+}
+
+#[derive(Debug)]
+pub(super) enum Inner {
+    Init(Init),
+    Listen(Listener),
+    Connected(Connected),
+}

+ 483 - 0
kernel/src/net/socket/unix/seqpacket/mod.rs

@@ -0,0 +1,483 @@
+pub mod inner;
+use alloc::{
+    string::String,
+    sync::{Arc, Weak},
+};
+use core::sync::atomic::{AtomicBool, Ordering};
+
+use crate::sched::SchedMode;
+use crate::{libs::rwlock::RwLock, net::socket::*};
+use inner::*;
+use system_error::SystemError;
+
+use super::INODE_MAP;
+
+type EP = EPollEventType;
+#[derive(Debug)]
+pub struct SeqpacketSocket {
+    inner: RwLock<Inner>,
+    shutdown: Shutdown,
+    is_nonblocking: AtomicBool,
+    wait_queue: WaitQueue,
+    self_ref: Weak<Self>,
+}
+
+impl SeqpacketSocket {
+    /// 默认的元数据缓冲区大小
+    pub const DEFAULT_METADATA_BUF_SIZE: usize = 1024;
+    /// 默认的缓冲区大小
+    pub const DEFAULT_BUF_SIZE: usize = 64 * 1024;
+
+    pub fn new(is_nonblocking: bool) -> Arc<Self> {
+        Arc::new_cyclic(|me| Self {
+            inner: RwLock::new(Inner::Init(Init::new())),
+            shutdown: Shutdown::new(),
+            is_nonblocking: AtomicBool::new(is_nonblocking),
+            wait_queue: WaitQueue::default(),
+            self_ref: me.clone(),
+        })
+    }
+
+    pub fn new_inode(is_nonblocking: bool) -> Result<Arc<Inode>, SystemError> {
+        let socket = SeqpacketSocket::new(is_nonblocking);
+        let inode = Inode::new(socket.clone());
+        // 建立时绑定自身为后续能正常获取本端地址
+        let _ = match &mut *socket.inner.write() {
+            Inner::Init(init) => init.bind(Endpoint::Inode((inode.clone(), String::from("")))),
+            _ => return Err(SystemError::EINVAL),
+        };
+        return Ok(inode);
+    }
+
+    pub fn new_connected(connected: Connected, is_nonblocking: bool) -> Arc<Self> {
+        Arc::new_cyclic(|me| Self {
+            inner: RwLock::new(Inner::Connected(connected)),
+            shutdown: Shutdown::new(),
+            is_nonblocking: AtomicBool::new(is_nonblocking),
+            wait_queue: WaitQueue::default(),
+            self_ref: me.clone(),
+        })
+    }
+
+    pub fn new_pairs() -> Result<(Arc<Inode>, Arc<Inode>), SystemError> {
+        let socket0 = SeqpacketSocket::new(false);
+        let socket1 = SeqpacketSocket::new(false);
+        let inode0 = Inode::new(socket0.clone());
+        let inode1 = Inode::new(socket1.clone());
+
+        let (conn_0, conn_1) = Connected::new_pair(
+            Some(Endpoint::Inode((inode0.clone(), String::from("")))),
+            Some(Endpoint::Inode((inode1.clone(), String::from("")))),
+        );
+        *socket0.inner.write() = Inner::Connected(conn_0);
+        *socket1.inner.write() = Inner::Connected(conn_1);
+
+        return Ok((inode0, inode1));
+    }
+
+    fn try_accept(&self) -> Result<(Arc<Inode>, Endpoint), SystemError> {
+        match &*self.inner.read() {
+            Inner::Listen(listen) => listen.try_accept() as _,
+            _ => {
+                log::error!("the socket is not listening");
+                return Err(SystemError::EINVAL);
+            }
+        }
+    }
+
+    fn is_acceptable(&self) -> bool {
+        match &*self.inner.read() {
+            Inner::Listen(listen) => listen.is_acceptable(),
+            _ => {
+                panic!("the socket is not listening");
+            }
+        }
+    }
+
+    fn is_peer_shutdown(&self) -> Result<bool, SystemError> {
+        let peer_shutdown = match self.get_peer_name()? {
+            Endpoint::Inode((inode, _)) => Arc::downcast::<SeqpacketSocket>(inode.inner())
+                .map_err(|_| SystemError::EINVAL)?
+                .shutdown
+                .get()
+                .is_both_shutdown(),
+            _ => return Err(SystemError::EINVAL),
+        };
+        Ok(peer_shutdown)
+    }
+
+    fn can_recv(&self) -> Result<bool, SystemError> {
+        let can = match &*self.inner.read() {
+            Inner::Connected(connected) => connected.can_recv(),
+            _ => return Err(SystemError::ENOTCONN),
+        };
+        Ok(can)
+    }
+
+    fn is_nonblocking(&self) -> bool {
+        self.is_nonblocking.load(Ordering::Relaxed)
+    }
+
+    fn set_nonblocking(&self, nonblocking: bool) {
+        self.is_nonblocking.store(nonblocking, Ordering::Relaxed);
+    }
+}
+
+impl Socket for SeqpacketSocket {
+    fn connect(&self, endpoint: Endpoint) -> Result<(), SystemError> {
+        let peer_inode = match endpoint {
+            Endpoint::Inode((inode, _)) => inode,
+            Endpoint::Unixpath((inode_id, _)) => {
+                let inode_guard = INODE_MAP.read_irqsave();
+                let inode = inode_guard.get(&inode_id).unwrap();
+                match inode {
+                    Endpoint::Inode((inode, _)) => inode.clone(),
+                    _ => return Err(SystemError::EINVAL),
+                }
+            }
+            _ => return Err(SystemError::EINVAL),
+        };
+        // 远端为服务端
+        let remote_socket = Arc::downcast::<SeqpacketSocket>(peer_inode.inner())
+            .map_err(|_| SystemError::EINVAL)?;
+
+        let client_epoint = match &mut *self.inner.write() {
+            Inner::Init(init) => match init.endpoint().cloned() {
+                Some(end) => {
+                    log::debug!("bind when connect");
+                    Some(end)
+                }
+                None => {
+                    log::debug!("not bind when connect");
+                    let inode = Inode::new(self.self_ref.upgrade().unwrap().clone());
+                    let epoint = Endpoint::Inode((inode.clone(), String::from("")));
+                    let _ = init.bind(epoint.clone());
+                    Some(epoint)
+                }
+            },
+            Inner::Listen(_) => return Err(SystemError::EINVAL),
+            Inner::Connected(_) => return Err(SystemError::EISCONN),
+        };
+        // ***阻塞与非阻塞处理还未实现
+        // 客户端与服务端建立连接将服务端inode推入到自身的listen_incom队列中,
+        // accept时从中获取推出对应的socket
+        match &*remote_socket.inner.read() {
+            Inner::Listen(listener) => match listener.push_incoming(client_epoint) {
+                Ok(connected) => {
+                    *self.inner.write() = Inner::Connected(connected);
+                    log::debug!("try to wake up");
+
+                    remote_socket.wait_queue.wakeup(None);
+                    return Ok(());
+                }
+                // ***错误处理
+                Err(_) => todo!(),
+            },
+            Inner::Init(_) => {
+                log::debug!("init einval");
+                return Err(SystemError::EINVAL);
+            }
+            Inner::Connected(_) => return Err(SystemError::EISCONN),
+        };
+    }
+
+    fn bind(&self, endpoint: Endpoint) -> Result<(), SystemError> {
+        // 将自身socket的inode与用户端提供路径的文件indoe_id进行绑定
+        match endpoint {
+            Endpoint::Unixpath((inodeid, path)) => {
+                let inode = match &mut *self.inner.write() {
+                    Inner::Init(init) => init.bind_path(path)?,
+                    _ => {
+                        log::error!("socket has listen or connected");
+                        return Err(SystemError::EINVAL);
+                    }
+                };
+
+                INODE_MAP.write_irqsave().insert(inodeid, inode);
+                Ok(())
+            }
+            _ => return Err(SystemError::EINVAL),
+        }
+    }
+
+    fn shutdown(&self, how: ShutdownTemp) -> Result<(), SystemError> {
+        log::debug!("seqpacket shutdown");
+        match &*self.inner.write() {
+            Inner::Connected(connected) => connected.shutdown(how),
+            _ => Err(SystemError::EINVAL),
+        }
+    }
+
+    fn listen(&self, backlog: usize) -> Result<(), SystemError> {
+        let mut state = self.inner.write();
+        log::debug!("listen into socket");
+        let epoint = match &*state {
+            Inner::Init(init) => init.endpoint().ok_or(SystemError::EINVAL)?.clone(),
+            Inner::Listen(listener) => return listener.listen(backlog),
+            Inner::Connected(_) => {
+                log::error!("the socket is connected");
+                return Err(SystemError::EINVAL);
+            }
+        };
+
+        let listener = Listener::new(epoint, backlog);
+        *state = Inner::Listen(listener);
+
+        Ok(())
+    }
+
+    fn accept(&self) -> Result<(Arc<Inode>, Endpoint), SystemError> {
+        if !self.is_nonblocking() {
+            loop {
+                wq_wait_event_interruptible!(self.wait_queue, self.is_acceptable(), {})?;
+                match self
+                    .try_accept()
+                    .map(|(seqpacket_socket, remote_endpoint)| {
+                        (seqpacket_socket, Endpoint::from(remote_endpoint))
+                    }) {
+                    Ok((socket, epoint)) => return Ok((socket, epoint)),
+                    Err(_) => continue,
+                }
+            }
+        } else {
+            // ***非阻塞状态
+            todo!()
+        }
+    }
+
+    fn set_option(
+        &self,
+        _level: crate::net::socket::OptionsLevel,
+        _optname: usize,
+        _optval: &[u8],
+    ) -> Result<(), SystemError> {
+        log::warn!("setsockopt is not implemented");
+        Ok(())
+    }
+
+    fn wait_queue(&self) -> &WaitQueue {
+        return &self.wait_queue;
+    }
+
+    fn close(&self) -> Result<(), SystemError> {
+        log::debug!("seqpacket close");
+        self.shutdown.recv_shutdown();
+        self.shutdown.send_shutdown();
+        Ok(())
+    }
+
+    fn get_peer_name(&self) -> Result<Endpoint, SystemError> {
+        // 获取对端地址
+        let endpoint = match &*self.inner.read() {
+            Inner::Connected(connected) => connected.peer_endpoint().cloned(),
+            _ => return Err(SystemError::ENOTCONN),
+        };
+
+        if let Some(endpoint) = endpoint {
+            return Ok(Endpoint::from(endpoint));
+        } else {
+            return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
+        }
+    }
+
+    fn get_name(&self) -> Result<Endpoint, SystemError> {
+        // 获取本端地址
+        let endpoint = match &*self.inner.read() {
+            Inner::Init(init) => init.endpoint().cloned(),
+            Inner::Listen(listener) => Some(listener.endpoint().clone()),
+            Inner::Connected(connected) => connected.endpoint().cloned(),
+        };
+
+        if let Some(endpoint) = endpoint {
+            return Ok(Endpoint::from(endpoint));
+        } else {
+            return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
+        }
+    }
+
+    fn get_option(
+        &self,
+        _level: crate::net::socket::OptionsLevel,
+        _name: usize,
+        _value: &mut [u8],
+    ) -> Result<usize, SystemError> {
+        log::warn!("getsockopt is not implemented");
+        Ok(0)
+    }
+
+    fn read(&self, buffer: &mut [u8]) -> Result<usize, SystemError> {
+        self.recv(buffer, crate::net::socket::MessageFlag::empty())
+    }
+
+    fn recv(
+        &self,
+        buffer: &mut [u8],
+        flags: crate::net::socket::MessageFlag,
+    ) -> Result<usize, SystemError> {
+        if flags.contains(MessageFlag::OOB) {
+            return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP);
+        }
+        if !flags.contains(MessageFlag::DONTWAIT) {
+            loop {
+                wq_wait_event_interruptible!(
+                    self.wait_queue,
+                    self.can_recv()? || self.is_peer_shutdown()?,
+                    {}
+                )?;
+                // connect锁和flag判断顺序不正确,应该先判断在
+                match &*self.inner.write() {
+                    Inner::Connected(connected) => match connected.try_read(buffer) {
+                        Ok(usize) => {
+                            log::debug!("recv from successfully");
+                            return Ok(usize);
+                        }
+                        Err(_) => continue,
+                    },
+                    _ => {
+                        log::error!("the socket is not connected");
+                        return Err(SystemError::ENOTCONN);
+                    }
+                }
+            }
+        } else {
+            unimplemented!("unimplemented non_block")
+        }
+    }
+
+    fn recv_msg(
+        &self,
+        _msg: &mut crate::net::syscall::MsgHdr,
+        _flags: crate::net::socket::MessageFlag,
+    ) -> Result<usize, SystemError> {
+        Err(SystemError::ENOSYS)
+    }
+
+    fn send(
+        &self,
+        buffer: &[u8],
+        flags: crate::net::socket::MessageFlag,
+    ) -> Result<usize, SystemError> {
+        if flags.contains(MessageFlag::OOB) {
+            return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP);
+        }
+        if self.is_peer_shutdown()? {
+            return Err(SystemError::EPIPE);
+        }
+        if !flags.contains(MessageFlag::DONTWAIT) {
+            loop {
+                match &*self.inner.write() {
+                    Inner::Connected(connected) => match connected.try_write(buffer) {
+                        Ok(usize) => {
+                            log::debug!("send successfully");
+                            return Ok(usize);
+                        }
+                        Err(_) => continue,
+                    },
+                    _ => {
+                        log::error!("the socket is not connected");
+                        return Err(SystemError::ENOTCONN);
+                    }
+                }
+            }
+        } else {
+            unimplemented!("unimplemented non_block")
+        }
+    }
+
+    fn send_msg(
+        &self,
+        _msg: &crate::net::syscall::MsgHdr,
+        _flags: crate::net::socket::MessageFlag,
+    ) -> Result<usize, SystemError> {
+        Err(SystemError::ENOSYS)
+    }
+
+    fn write(&self, buffer: &[u8]) -> Result<usize, SystemError> {
+        self.send(buffer, crate::net::socket::MessageFlag::empty())
+    }
+
+    fn recv_from(
+        &self,
+        buffer: &mut [u8],
+        flags: MessageFlag,
+        _address: Option<Endpoint>,
+    ) -> Result<(usize, Endpoint), SystemError> {
+        log::debug!("recvfrom flags {:?}", flags);
+        if flags.contains(MessageFlag::OOB) {
+            return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP);
+        }
+        if !flags.contains(MessageFlag::DONTWAIT) {
+            loop {
+                wq_wait_event_interruptible!(
+                    self.wait_queue,
+                    self.can_recv()? || self.is_peer_shutdown()?,
+                    {}
+                )?;
+                // connect锁和flag判断顺序不正确,应该先判断在
+                match &*self.inner.write() {
+                    Inner::Connected(connected) => match connected.recv_slice(buffer) {
+                        Ok(usize) => {
+                            log::debug!("recvs from successfully");
+                            return Ok((usize, connected.peer_endpoint().unwrap().clone()));
+                        }
+                        Err(_) => continue,
+                    },
+                    _ => {
+                        log::error!("the socket is not connected");
+                        return Err(SystemError::ENOTCONN);
+                    }
+                }
+            }
+        } else {
+            unimplemented!("unimplemented non_block")
+        }
+        //Err(SystemError::ENOSYS)
+    }
+
+    fn send_buffer_size(&self) -> usize {
+        log::warn!("using default buffer size");
+        SeqpacketSocket::DEFAULT_BUF_SIZE
+    }
+
+    fn recv_buffer_size(&self) -> usize {
+        log::warn!("using default buffer size");
+        SeqpacketSocket::DEFAULT_BUF_SIZE
+    }
+
+    fn poll(&self) -> usize {
+        let mut mask = EP::empty();
+        let shutdown = self.shutdown.get();
+
+        // 参考linux的unix_poll https://code.dragonos.org.cn/xref/linux-6.1.9/net/unix/af_unix.c#3152
+        // 用关闭读写端表示连接断开
+        if shutdown.is_both_shutdown() || self.is_peer_shutdown().unwrap() {
+            mask |= EP::EPOLLHUP;
+        }
+
+        if shutdown.is_recv_shutdown() {
+            mask |= EP::EPOLLRDHUP | EP::EPOLLIN | EP::EPOLLRDNORM;
+        }
+        match &*self.inner.read() {
+            Inner::Connected(connected) => {
+                if connected.can_recv() {
+                    mask |= EP::EPOLLIN | EP::EPOLLRDNORM;
+                }
+                // if (sk_is_readable(sk))
+                // mask |= EPOLLIN | EPOLLRDNORM;
+
+                // TODO:处理紧急情况 EPOLLPRI
+                // TODO:处理连接是否关闭 EPOLLHUP
+                if !shutdown.is_send_shutdown() {
+                    if connected.can_send().unwrap() {
+                        mask |= EP::EPOLLOUT | EP::EPOLLWRNORM | EP::EPOLLWRBAND;
+                    } else {
+                        todo!("poll: buffer space not enough");
+                    }
+                }
+            }
+            Inner::Listen(_) => mask |= EP::EPOLLIN,
+            Inner::Init(_) => mask |= EP::EPOLLOUT,
+        }
+        mask.bits() as usize
+    }
+}

+ 243 - 0
kernel/src/net/socket/unix/stream/inner.rs

@@ -0,0 +1,243 @@
+use core::sync::atomic::{AtomicUsize, Ordering};
+
+use log::debug;
+use system_error::SystemError;
+
+use crate::libs::mutex::Mutex;
+use crate::net::socket::buffer::Buffer;
+use crate::net::socket::unix::stream::StreamSocket;
+use crate::net::socket::{Endpoint, Inode, ShutdownTemp};
+
+use alloc::collections::VecDeque;
+use alloc::{string::String, sync::Arc};
+
+#[derive(Debug)]
+pub enum Inner {
+    Init(Init),
+    Connected(Connected),
+    Listener(Listener),
+}
+
+#[derive(Debug)]
+pub struct Init {
+    addr: Option<Endpoint>,
+}
+
+impl Init {
+    pub(super) fn new() -> Self {
+        Self { addr: None }
+    }
+
+    pub(super) fn bind(&mut self, endpoint_to_bind: Endpoint) -> Result<(), SystemError> {
+        if self.addr.is_some() {
+            log::error!("the socket is already bound");
+            return Err(SystemError::EINVAL);
+        }
+
+        match endpoint_to_bind {
+            Endpoint::Inode(_) => self.addr = Some(endpoint_to_bind),
+            _ => return Err(SystemError::EINVAL),
+        }
+
+        return Ok(());
+    }
+
+    pub fn bind_path(&mut self, sun_path: String) -> Result<Endpoint, SystemError> {
+        if self.addr.is_none() {
+            log::error!("the socket is not bound");
+            return Err(SystemError::EINVAL);
+        }
+        if let Some(Endpoint::Inode((inode, mut path))) = self.addr.take() {
+            path = sun_path;
+            let epoint = Endpoint::Inode((inode, path));
+            self.addr.replace(epoint.clone());
+            return Ok(epoint);
+        };
+
+        return Err(SystemError::EINVAL);
+    }
+
+    pub(super) fn endpoint(&self) -> Option<&Endpoint> {
+        self.addr.as_ref()
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct Connected {
+    addr: Option<Endpoint>,
+    peer_addr: Option<Endpoint>,
+    buffer: Arc<Buffer>,
+}
+
+impl Connected {
+    pub fn new_pair(addr: Option<Endpoint>, peer_addr: Option<Endpoint>) -> (Self, Self) {
+        let this = Connected {
+            addr: addr.clone(),
+            peer_addr: peer_addr.clone(),
+            buffer: Buffer::new(),
+        };
+        let peer = Connected {
+            addr: peer_addr,
+            peer_addr: addr,
+            buffer: Buffer::new(),
+        };
+
+        return (this, peer);
+    }
+
+    pub fn endpoint(&self) -> Option<&Endpoint> {
+        self.addr.as_ref()
+    }
+
+    pub fn set_addr(&mut self, addr: Option<Endpoint>) {
+        self.addr = addr;
+    }
+
+    pub fn peer_endpoint(&self) -> Option<&Endpoint> {
+        self.peer_addr.as_ref()
+    }
+
+    pub fn set_peer_addr(&mut self, peer: Option<Endpoint>) {
+        self.peer_addr = peer;
+    }
+
+    pub fn send_slice(&self, buf: &[u8]) -> Result<usize, SystemError> {
+        //写入对端buffer
+        let peer_inode = match self.peer_addr.as_ref().unwrap() {
+            Endpoint::Inode((inode, _)) => inode,
+            _ => return Err(SystemError::EINVAL),
+        };
+        let peer_socket =
+            Arc::downcast::<StreamSocket>(peer_inode.inner()).map_err(|_| SystemError::EINVAL)?;
+        let usize = match &*peer_socket.inner.read() {
+            Inner::Connected(conntected) => {
+                let usize = conntected.buffer.write_read_buffer(buf)?;
+                usize
+            }
+            _ => {
+                debug!("no! is not connested!");
+                return Err(SystemError::EINVAL);
+            }
+        };
+        peer_socket.wait_queue.wakeup(None);
+        Ok(usize)
+    }
+
+    pub fn can_send(&self) -> Result<bool, SystemError> {
+        //查看连接体里的buf是否非满
+        let peer_inode = match self.peer_addr.as_ref().unwrap() {
+            Endpoint::Inode((inode, _)) => inode,
+            _ => return Err(SystemError::EINVAL),
+        };
+        let peer_socket =
+            Arc::downcast::<StreamSocket>(peer_inode.inner()).map_err(|_| SystemError::EINVAL)?;
+        let is_full = match &*peer_socket.inner.read() {
+            Inner::Connected(connected) => connected.buffer.is_read_buf_full(),
+            _ => return Err(SystemError::EINVAL),
+        };
+        debug!("can send? :{}", !is_full);
+        Ok(!is_full)
+    }
+
+    pub fn can_recv(&self) -> bool {
+        //查看连接体里的buf是否非空
+        return !self.buffer.is_read_buf_empty();
+    }
+
+    pub fn try_send(&self, buf: &[u8]) -> Result<usize, SystemError> {
+        if self.can_send()? {
+            return self.send_slice(buf);
+        } else {
+            return Err(SystemError::ENOBUFS);
+        }
+    }
+
+    fn recv_slice(&self, buf: &mut [u8]) -> Result<usize, SystemError> {
+        return self.buffer.read_read_buffer(buf);
+    }
+
+    pub fn try_recv(&self, buf: &mut [u8]) -> Result<usize, SystemError> {
+        if self.can_recv() {
+            return self.recv_slice(buf);
+        } else {
+            return Err(SystemError::EINVAL);
+        }
+    }
+
+    pub fn shutdown(&self, how: ShutdownTemp) -> Result<(), SystemError> {
+        if how.is_empty() {
+            return Err(SystemError::EINVAL);
+        } else if how.is_send_shutdown() {
+            unimplemented!("unimplemented!");
+        } else if how.is_recv_shutdown() {
+            unimplemented!("unimplemented!");
+        }
+
+        Ok(())
+    }
+}
+
+#[derive(Debug)]
+pub struct Listener {
+    addr: Option<Endpoint>,
+    incoming_connects: Mutex<VecDeque<Arc<Inode>>>,
+    backlog: AtomicUsize,
+}
+
+impl Listener {
+    pub fn new(addr: Option<Endpoint>, backlog: usize) -> Self {
+        Self {
+            addr,
+            incoming_connects: Mutex::new(VecDeque::new()),
+            backlog: AtomicUsize::new(backlog),
+        }
+    }
+
+    pub fn listen(&self, backlog: usize) -> Result<(), SystemError> {
+        self.backlog.store(backlog, Ordering::Relaxed);
+        return Ok(());
+    }
+
+    pub fn push_incoming(&self, server_inode: Arc<Inode>) -> Result<(), SystemError> {
+        let mut incoming_connects = self.incoming_connects.lock();
+
+        if incoming_connects.len() >= self.backlog.load(Ordering::Relaxed) {
+            debug!("unix stream listen socket connected queue is full!");
+            return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
+        }
+
+        incoming_connects.push_back(server_inode);
+
+        return Ok(());
+    }
+
+    pub fn pop_incoming(&self) -> Option<Arc<Inode>> {
+        let mut incoming_connects = self.incoming_connects.lock();
+
+        return incoming_connects.pop_front();
+    }
+
+    pub(super) fn endpoint(&self) -> Option<&Endpoint> {
+        self.addr.as_ref()
+    }
+
+    pub(super) fn is_acceptable(&self) -> bool {
+        return self.incoming_connects.lock().len() != 0;
+    }
+
+    pub(super) fn try_accept(&self) -> Result<(Arc<Inode>, Endpoint), SystemError> {
+        let mut incoming_connecteds = self.incoming_connects.lock();
+        debug!("incom len {}", incoming_connecteds.len());
+        let connected = incoming_connecteds
+            .pop_front()
+            .ok_or(SystemError::EAGAIN_OR_EWOULDBLOCK)?;
+        let socket =
+            Arc::downcast::<StreamSocket>(connected.inner()).map_err(|_| SystemError::EINVAL)?;
+        let peer = match &*socket.inner.read() {
+            Inner::Connected(connected) => connected.peer_endpoint().unwrap().clone(),
+            _ => return Err(SystemError::ENOTCONN),
+        };
+        debug!("server accept!");
+        return Ok((Inode::new(socket), peer));
+    }
+}

+ 478 - 0
kernel/src/net/socket/unix/stream/mod.rs

@@ -0,0 +1,478 @@
+use crate::sched::SchedMode;
+use alloc::{
+    string::String,
+    sync::{Arc, Weak},
+};
+use inner::{Connected, Init, Inner, Listener};
+use log::debug;
+use system_error::SystemError;
+use unix::INODE_MAP;
+
+use crate::{
+    libs::rwlock::RwLock,
+    net::socket::{self, *},
+};
+
+type EP = EPollEventType;
+
+pub mod inner;
+
+#[derive(Debug)]
+pub struct StreamSocket {
+    inner: RwLock<Inner>,
+    shutdown: Shutdown,
+    _epitems: EPollItems,
+    wait_queue: WaitQueue,
+    self_ref: Weak<Self>,
+}
+
+impl StreamSocket {
+    /// 默认的元数据缓冲区大小
+    pub const DEFAULT_METADATA_BUF_SIZE: usize = 1024;
+    /// 默认的缓冲区大小
+    pub const DEFAULT_BUF_SIZE: usize = 64 * 1024;
+
+    pub fn new() -> Arc<Self> {
+        Arc::new_cyclic(|me| Self {
+            inner: RwLock::new(Inner::Init(Init::new())),
+            shutdown: Shutdown::new(),
+            _epitems: EPollItems::default(),
+            wait_queue: WaitQueue::default(),
+            self_ref: me.clone(),
+        })
+    }
+
+    pub fn new_pairs() -> Result<(Arc<Inode>, Arc<Inode>), SystemError> {
+        let socket0 = StreamSocket::new();
+        let socket1 = StreamSocket::new();
+        let inode0 = Inode::new(socket0.clone());
+        let inode1 = Inode::new(socket1.clone());
+
+        let (conn_0, conn_1) = Connected::new_pair(
+            Some(Endpoint::Inode((inode0.clone(), String::from("")))),
+            Some(Endpoint::Inode((inode1.clone(), String::from("")))),
+        );
+        *socket0.inner.write() = Inner::Connected(conn_0);
+        *socket1.inner.write() = Inner::Connected(conn_1);
+
+        return Ok((inode0, inode1));
+    }
+
+    pub fn new_connected(connected: Connected) -> Arc<Self> {
+        Arc::new_cyclic(|me| Self {
+            inner: RwLock::new(Inner::Connected(connected)),
+            shutdown: Shutdown::new(),
+            _epitems: EPollItems::default(),
+            wait_queue: WaitQueue::default(),
+            self_ref: me.clone(),
+        })
+    }
+
+    pub fn new_inode() -> Result<Arc<Inode>, SystemError> {
+        let socket = StreamSocket::new();
+        let inode = Inode::new(socket.clone());
+
+        let _ = match &mut *socket.inner.write() {
+            Inner::Init(init) => init.bind(Endpoint::Inode((inode.clone(), String::from("")))),
+            _ => return Err(SystemError::EINVAL),
+        };
+
+        return Ok(inode);
+    }
+
+    fn is_acceptable(&self) -> bool {
+        match &*self.inner.read() {
+            Inner::Listener(listener) => listener.is_acceptable(),
+            _ => {
+                panic!("the socket is not listening");
+            }
+        }
+    }
+
+    pub fn try_accept(&self) -> Result<(Arc<Inode>, Endpoint), SystemError> {
+        match &*self.inner.read() {
+            Inner::Listener(listener) => listener.try_accept() as _,
+            _ => {
+                log::error!("the socket is not listening");
+                return Err(SystemError::EINVAL);
+            }
+        }
+    }
+
+    fn is_peer_shutdown(&self) -> Result<bool, SystemError> {
+        let peer_shutdown = match self.get_peer_name()? {
+            Endpoint::Inode((inode, _)) => Arc::downcast::<StreamSocket>(inode.inner())
+                .map_err(|_| SystemError::EINVAL)?
+                .shutdown
+                .get()
+                .is_both_shutdown(),
+            _ => return Err(SystemError::EINVAL),
+        };
+        Ok(peer_shutdown)
+    }
+
+    fn can_recv(&self) -> Result<bool, SystemError> {
+        let can = match &*self.inner.read() {
+            Inner::Connected(connected) => connected.can_recv(),
+            _ => return Err(SystemError::ENOTCONN),
+        };
+        Ok(can)
+    }
+}
+
+impl Socket for StreamSocket {
+    fn connect(&self, server_endpoint: Endpoint) -> Result<(), SystemError> {
+        //获取客户端地址
+        let client_endpoint = match &mut *self.inner.write() {
+            Inner::Init(init) => match init.endpoint().cloned() {
+                Some(endpoint) => {
+                    debug!("bind when connected");
+                    Some(endpoint)
+                }
+                None => {
+                    debug!("not bind when connected");
+                    let inode = Inode::new(self.self_ref.upgrade().unwrap().clone());
+                    let epoint = Endpoint::Inode((inode.clone(), String::from("")));
+                    let _ = init.bind(epoint.clone());
+                    Some(epoint)
+                }
+            },
+            Inner::Connected(_) => return Err(SystemError::EISCONN),
+            Inner::Listener(_) => return Err(SystemError::EINVAL),
+        };
+        //获取服务端地址
+        // let peer_inode = match server_endpoint.clone() {
+        //     Endpoint::Inode(socket) => socket,
+        //     _ => return Err(SystemError::EINVAL),
+        // };
+
+        //找到对端socket
+        let (peer_inode, sun_path) = match server_endpoint {
+            Endpoint::Inode((inode, path)) => (inode, path),
+            Endpoint::Unixpath((inode_id, path)) => {
+                let inode_guard = INODE_MAP.read_irqsave();
+                let inode = inode_guard.get(&inode_id).unwrap();
+                match inode {
+                    Endpoint::Inode((inode, _)) => (inode.clone(), path),
+                    _ => return Err(SystemError::EINVAL),
+                }
+            }
+            _ => return Err(SystemError::EINVAL),
+        };
+
+        let remote_socket: Arc<StreamSocket> =
+            Arc::downcast::<StreamSocket>(peer_inode.inner()).map_err(|_| SystemError::EINVAL)?;
+
+        //创建新的对端socket
+        let new_server_socket = StreamSocket::new();
+        let new_server_inode = Inode::new(new_server_socket.clone());
+        let new_server_endpoint = Some(Endpoint::Inode((new_server_inode.clone(), sun_path)));
+        //获取connect pair
+        let (client_conn, server_conn) =
+            Connected::new_pair(client_endpoint, new_server_endpoint.clone());
+        *new_server_socket.inner.write() = Inner::Connected(server_conn);
+
+        //查看remote_socket是否处于监听状态
+        let remote_listener = remote_socket.inner.write();
+        match &*remote_listener {
+            Inner::Listener(listener) => {
+                //往服务端socket的连接队列中添加connected
+                listener.push_incoming(new_server_inode)?;
+                *self.inner.write() = Inner::Connected(client_conn);
+                remote_socket.wait_queue.wakeup(None);
+            }
+            _ => return Err(SystemError::EINVAL),
+        }
+
+        return Ok(());
+    }
+
+    fn bind(&self, endpoint: Endpoint) -> Result<(), SystemError> {
+        match endpoint {
+            Endpoint::Unixpath((inodeid, path)) => {
+                let inode = match &mut *self.inner.write() {
+                    Inner::Init(init) => init.bind_path(path)?,
+                    _ => {
+                        log::error!("socket has listen or connected");
+                        return Err(SystemError::EINVAL);
+                    }
+                };
+                INODE_MAP.write_irqsave().insert(inodeid, inode);
+                Ok(())
+            }
+            _ => return Err(SystemError::EINVAL),
+        }
+    }
+
+    fn shutdown(&self, _stype: ShutdownTemp) -> Result<(), SystemError> {
+        todo!();
+    }
+
+    fn listen(&self, backlog: usize) -> Result<(), SystemError> {
+        let mut inner = self.inner.write();
+        let epoint = match &*inner {
+            Inner::Init(init) => init.endpoint().ok_or(SystemError::EINVAL)?.clone(),
+            Inner::Connected(_) => {
+                return Err(SystemError::EINVAL);
+            }
+            Inner::Listener(listener) => {
+                return listener.listen(backlog);
+            }
+        };
+
+        let listener = Listener::new(Some(epoint), backlog);
+        *inner = Inner::Listener(listener);
+
+        return Ok(());
+    }
+
+    fn accept(&self) -> Result<(Arc<socket::Inode>, Endpoint), SystemError> {
+        debug!("stream server begin accept");
+        //目前只实现了阻塞式实现
+        loop {
+            wq_wait_event_interruptible!(self.wait_queue, self.is_acceptable(), {})?;
+            match self.try_accept().map(|(stream_socket, remote_endpoint)| {
+                (stream_socket, remote_endpoint)
+            }) {
+                Ok((socket, endpoint)) => {
+                    debug!("server accept!:{:?}", endpoint);
+                    return Ok((socket, endpoint));
+                }
+                Err(_) => continue,
+            }
+        }
+    }
+
+    fn set_option(
+        &self,
+        _level: OptionsLevel,
+        _optname: usize,
+        _optval: &[u8],
+    ) -> Result<(), SystemError> {
+        log::warn!("setsockopt is not implemented");
+        Ok(())
+    }
+
+    fn wait_queue(&self) -> &WaitQueue {
+        return &self.wait_queue;
+    }
+
+    fn poll(&self) -> usize {
+        let mut mask = EP::empty();
+        let shutdown = self.shutdown.get();
+
+        // 参考linux的unix_poll https://code.dragonos.org.cn/xref/linux-6.1.9/net/unix/af_unix.c#3152
+        // 用关闭读写端表示连接断开
+        if shutdown.is_both_shutdown() || self.is_peer_shutdown().unwrap() {
+            mask |= EP::EPOLLHUP;
+        }
+
+        if shutdown.is_recv_shutdown() {
+            mask |= EP::EPOLLRDHUP | EP::EPOLLIN | EP::EPOLLRDNORM;
+        }
+        match &*self.inner.read() {
+            Inner::Connected(connected) => {
+                if connected.can_recv() {
+                    mask |= EP::EPOLLIN | EP::EPOLLRDNORM;
+                }
+                // if (sk_is_readable(sk))
+                // mask |= EPOLLIN | EPOLLRDNORM;
+
+                // TODO:处理紧急情况 EPOLLPRI
+                // TODO:处理连接是否关闭 EPOLLHUP
+                if !shutdown.is_send_shutdown() {
+                    if connected.can_send().unwrap() {
+                        mask |= EP::EPOLLOUT | EP::EPOLLWRNORM | EP::EPOLLWRBAND;
+                    } else {
+                        todo!("poll: buffer space not enough");
+                    }
+                }
+            }
+            Inner::Listener(_) => mask |= EP::EPOLLIN,
+            Inner::Init(_) => mask |= EP::EPOLLOUT,
+        }
+        mask.bits() as usize
+    }
+
+    fn close(&self) -> Result<(), SystemError> {
+        self.shutdown.recv_shutdown();
+        self.shutdown.send_shutdown();
+        Ok(())
+    }
+
+    fn get_peer_name(&self) -> Result<Endpoint, SystemError> {
+        //获取对端地址
+        let endpoint = match &*self.inner.read() {
+            Inner::Connected(connected) => connected.peer_endpoint().cloned(),
+            _ => return Err(SystemError::ENOTCONN),
+        };
+
+        if let Some(endpoint) = endpoint {
+            return Ok(endpoint);
+        } else {
+            return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
+        }
+    }
+
+    fn get_name(&self) -> Result<Endpoint, SystemError> {
+        //获取本端地址
+        let endpoint = match &*self.inner.read() {
+            Inner::Init(init) => init.endpoint().cloned(),
+            Inner::Connected(connected) => connected.endpoint().cloned(),
+            Inner::Listener(listener) => listener.endpoint().cloned(),
+        };
+
+        if let Some(endpoint) = endpoint {
+            return Ok(endpoint);
+        } else {
+            return Err(SystemError::EAGAIN_OR_EWOULDBLOCK);
+        }
+    }
+
+    fn get_option(
+        &self,
+        _level: OptionsLevel,
+        _name: usize,
+        _value: &mut [u8],
+    ) -> Result<usize, SystemError> {
+        log::warn!("getsockopt is not implemented");
+        Ok(0)
+    }
+
+    fn read(&self, buffer: &mut [u8]) -> Result<usize, SystemError> {
+        self.recv(buffer, socket::MessageFlag::empty())
+    }
+
+    fn recv(&self, buffer: &mut [u8], flags: socket::MessageFlag) -> Result<usize, SystemError> {
+        if !flags.contains(MessageFlag::DONTWAIT) {
+            loop {
+                log::debug!("socket try recv");
+                wq_wait_event_interruptible!(
+                    self.wait_queue,
+                    self.can_recv()? || self.is_peer_shutdown()?,
+                    {}
+                )?;
+                // connect锁和flag判断顺序不正确,应该先判断在
+                match &*self.inner.write() {
+                    Inner::Connected(connected) => match connected.try_recv(buffer) {
+                        Ok(usize) => {
+                            log::debug!("recv successfully");
+                            return Ok(usize);
+                        }
+                        Err(_) => continue,
+                    },
+                    _ => {
+                        log::error!("the socket is not connected");
+                        return Err(SystemError::ENOTCONN);
+                    }
+                }
+            }
+        } else {
+            unimplemented!("unimplemented non_block")
+        }
+    }
+
+    fn recv_from(
+        &self,
+        buffer: &mut [u8],
+        flags: socket::MessageFlag,
+        _address: Option<Endpoint>,
+    ) -> Result<(usize, Endpoint), SystemError> {
+        if flags.contains(MessageFlag::OOB) {
+            return Err(SystemError::EOPNOTSUPP_OR_ENOTSUP);
+        }
+        if !flags.contains(MessageFlag::DONTWAIT) {
+            loop {
+                log::debug!("socket try recv from");
+
+                wq_wait_event_interruptible!(
+                    self.wait_queue,
+                    self.can_recv()? || self.is_peer_shutdown()?,
+                    {}
+                )?;
+                // connect锁和flag判断顺序不正确,应该先判断在
+                log::debug!("try recv");
+
+                match &*self.inner.write() {
+                    Inner::Connected(connected) => match connected.try_recv(buffer) {
+                        Ok(usize) => {
+                            log::debug!("recvs from successfully");
+                            return Ok((usize, connected.peer_endpoint().unwrap().clone()));
+                        }
+                        Err(_) => continue,
+                    },
+                    _ => {
+                        log::error!("the socket is not connected");
+                        return Err(SystemError::ENOTCONN);
+                    }
+                }
+            }
+        } else {
+            unimplemented!("unimplemented non_block")
+        }
+    }
+
+    fn recv_msg(
+        &self,
+        _msg: &mut crate::net::syscall::MsgHdr,
+        _flags: socket::MessageFlag,
+    ) -> Result<usize, SystemError> {
+        Err(SystemError::ENOSYS)
+    }
+
+    fn send(&self, buffer: &[u8], flags: socket::MessageFlag) -> Result<usize, SystemError> {
+        if self.is_peer_shutdown()? {
+            return Err(SystemError::EPIPE);
+        }
+        if !flags.contains(MessageFlag::DONTWAIT) {
+            loop {
+                match &*self.inner.write() {
+                    Inner::Connected(connected) => match connected.try_send(buffer) {
+                        Ok(usize) => {
+                            log::debug!("send successfully");
+                            return Ok(usize);
+                        }
+                        Err(_) => continue,
+                    },
+                    _ => {
+                        log::error!("the socket is not connected");
+                        return Err(SystemError::ENOTCONN);
+                    }
+                }
+            }
+        } else {
+            unimplemented!("unimplemented non_block")
+        }
+    }
+
+    fn send_msg(
+        &self,
+        _msg: &crate::net::syscall::MsgHdr,
+        _flags: socket::MessageFlag,
+    ) -> Result<usize, SystemError> {
+        todo!()
+    }
+
+    fn send_to(
+        &self,
+        _buffer: &[u8],
+        _flags: socket::MessageFlag,
+        _address: Endpoint,
+    ) -> Result<usize, SystemError> {
+        Err(SystemError::ENOSYS)
+    }
+
+    fn write(&self, buffer: &[u8]) -> Result<usize, SystemError> {
+        self.send(buffer, socket::MessageFlag::empty())
+    }
+
+    fn send_buffer_size(&self) -> usize {
+        log::warn!("using default buffer size");
+        StreamSocket::DEFAULT_BUF_SIZE
+    }
+
+    fn recv_buffer_size(&self) -> usize {
+        log::warn!("using default buffer size");
+        StreamSocket::DEFAULT_BUF_SIZE
+    }
+}

+ 28 - 0
kernel/src/net/socket/utils.rs

@@ -0,0 +1,28 @@
+use crate::net::socket;
+use alloc::sync::Arc;
+use socket::Family;
+use system_error::SystemError;
+
+pub fn create_socket(
+    family: socket::AddressFamily,
+    socket_type: socket::Type,
+    protocol: u32,
+    is_nonblock: bool,
+    is_close_on_exec: bool,
+) -> Result<Arc<socket::Inode>, SystemError> {
+    type AF = socket::AddressFamily;
+    let inode = match family {
+        AF::INet => socket::inet::Inet::socket(socket_type, protocol)?,
+        AF::INet6 => {
+            todo!("AF_INET6 unimplemented");
+        }
+        AF::Unix => socket::unix::Unix::socket(socket_type, protocol)?,
+        AF::Netlink => socket::netlink::Netlink::socket(socket_type, protocol)?,
+        _ => {
+            todo!("unsupport address family");
+        }
+    };
+    // inode.set_nonblock(is_nonblock);
+    // inode.set_close_on_exec(is_close_on_exec);
+    return Ok(inode);
+}

+ 166 - 561
kernel/src/net/syscall.rs

@@ -1,9 +1,11 @@
 use core::{cmp::min, ffi::CStr};
 
+use acpi::address;
 use alloc::{boxed::Box, sync::Arc};
+use log::debug;
 use num_traits::{FromPrimitive, ToPrimitive};
 use smoltcp::wire;
-use system_error::SystemError;
+use system_error::SystemError::{self, *};
 
 use crate::{
     filesystem::vfs::{
@@ -13,15 +15,15 @@ use crate::{
     },
     libs::spinlock::SpinLockGuard,
     mm::{verify_area, VirtAddr},
-    net::socket::{AddressFamily, SOL_SOCKET},
+    // net::socket::{netlink::af_netlink::NetlinkSock, AddressFamily},
     process::ProcessManager,
     syscall::Syscall,
 };
 
-use super::{
-    socket::{new_socket, PosixSocketType, Socket, SocketInode},
-    Endpoint, Protocol, ShutdownType,
-};
+use super::socket::{self, Endpoint, Socket};
+use super::socket::{netlink::endpoint, unix::Unix, AddressFamily as AF};
+
+pub use super::syscall_util::*;
 
 /// Flags for socket, socketpair, accept4
 const SOCK_CLOEXEC: FileMode = FileMode::O_CLOEXEC;
@@ -38,18 +40,34 @@ impl Syscall {
         socket_type: usize,
         protocol: usize,
     ) -> Result<usize, SystemError> {
-        let address_family = AddressFamily::try_from(address_family as u16)?;
-        let socket_type = PosixSocketType::try_from((socket_type & 0xf) as u8)?;
-        let protocol = Protocol::from(protocol as u8);
-
-        let socket = new_socket(address_family, socket_type, protocol)?;
-
-        let socketinode: Arc<SocketInode> = SocketInode::new(socket);
-        let f = File::new(socketinode, FileMode::O_RDWR)?;
+        // 打印收到的参数
+        log::debug!(
+            "socket: address_family={:?}, socket_type={:?}, protocol={:?}",
+            address_family,
+            socket_type,
+            protocol
+        );
+        let address_family = socket::AddressFamily::try_from(address_family as u16)?;
+        let type_arg = SysArgSocketType::from_bits_truncate(socket_type as u32);
+        let is_nonblock = type_arg.is_nonblock();
+        let is_close_on_exec = type_arg.is_cloexec();
+        let stype = socket::Type::try_from(type_arg)?;
+        log::debug!("type_arg {:?}  stype {:?}", type_arg, stype);
+
+        let inode = socket::create_socket(
+            address_family,
+            stype,
+            protocol as u32,
+            is_nonblock,
+            is_close_on_exec,
+        )?;
+
+        let file = File::new(inode, FileMode::O_RDWR)?;
         // 把socket添加到当前进程的文件描述符表中
         let binding = ProcessManager::current_pcb().fd_table();
         let mut fd_table_guard = binding.write();
-        let fd = fd_table_guard.alloc_fd(f, None).map(|x| x as usize);
+        let fd: Result<usize, SystemError> =
+            fd_table_guard.alloc_fd(file, None).map(|x| x as usize);
         drop(fd_table_guard);
         return fd;
     }
@@ -67,27 +85,43 @@ impl Syscall {
         protocol: usize,
         fds: &mut [i32],
     ) -> Result<usize, SystemError> {
-        let address_family = AddressFamily::try_from(address_family as u16)?;
-        let socket_type = PosixSocketType::try_from((socket_type & 0xf) as u8)?;
-        let protocol = Protocol::from(protocol as u8);
+        let address_family = AF::try_from(address_family as u16)?;
+        let socket_type = SysArgSocketType::from_bits_truncate(socket_type as u32);
+        let stype = socket::Type::try_from(socket_type)?;
 
         let binding = ProcessManager::current_pcb().fd_table();
         let mut fd_table_guard = binding.write();
 
-        // 创建一对socket
-        let inode0 = SocketInode::new(new_socket(address_family, socket_type, protocol)?);
-        let inode1 = SocketInode::new(new_socket(address_family, socket_type, protocol)?);
-
-        // 进行pair
-        unsafe {
-            inode0
-                .inner_no_preempt()
-                .connect(Endpoint::Inode(Some(inode1.clone())))?;
-            inode1
-                .inner_no_preempt()
-                .connect(Endpoint::Inode(Some(inode0.clone())))?;
+        // check address family, only support AF_UNIX
+        if address_family != AF::Unix {
+            return Err(SystemError::EAFNOSUPPORT);
         }
 
+        // 创建一对socket
+        // let inode0 = socket::create_socket(
+        //     address_family,
+        //     stype,
+        //     protocol as u32,
+        //     socket_type.is_nonblock(),
+        //     socket_type.is_cloexec(),
+        // )?;
+        // let inode1 = socket::create_socket(
+        //     address_family,
+        //     stype,
+        //     protocol as u32,
+        //     socket_type.is_nonblock(),
+        //     socket_type.is_cloexec(),
+        // )?;
+
+        // // 进行pair
+        // unsafe {
+        //     inode0.connect(socket::Endpoint::Inode(inode1.clone()))?;
+        //     inode1.connect(socket::Endpoint::Inode(inode0.clone()))?;
+        // }
+
+        // 创建一对新的unix socket pair
+        let (inode0, inode1) = Unix::new_pairs(stype)?;
+
         fds[0] = fd_table_guard.alloc_fd(File::new(inode0, FileMode::O_RDWR)?, None)?;
         fds[1] = fd_table_guard.alloc_fd(File::new(inode1, FileMode::O_RDWR)?, None)?;
 
@@ -108,12 +142,12 @@ impl Syscall {
         optname: usize,
         optval: &[u8],
     ) -> Result<usize, SystemError> {
-        let socket_inode: Arc<SocketInode> = ProcessManager::current_pcb()
+        let sol = socket::OptionsLevel::try_from(level as u32)?;
+        let socket: Arc<socket::Inode> = ProcessManager::current_pcb()
             .get_socket(fd as i32)
             .ok_or(SystemError::EBADF)?;
-        // 获取内层的socket(真正的数据)
-        let socket: SpinLockGuard<Box<dyn Socket>> = socket_inode.inner();
-        return socket.setsockopt(level, optname, optval).map(|_| 0);
+        debug!("setsockopt: level={:?}", level);
+        return socket.set_option(sol, optname, optval).map(|_| 0);
     }
 
     /// @brief sys_getsockopt系统调用的实际执行函数
@@ -134,33 +168,35 @@ impl Syscall {
     ) -> Result<usize, SystemError> {
         // 获取socket
         let optval = optval as *mut u32;
-        let binding: Arc<SocketInode> = ProcessManager::current_pcb()
+        let socket: Arc<socket::Inode> = ProcessManager::current_pcb()
             .get_socket(fd as i32)
-            .ok_or(SystemError::EBADF)?;
-        let socket = binding.inner();
+            .ok_or(EBADF)?;
 
-        if level as u8 == SOL_SOCKET {
-            let optname = PosixSocketOption::try_from(optname as i32)
-                .map_err(|_| SystemError::ENOPROTOOPT)?;
+        let level = socket::OptionsLevel::try_from(level as u32)?;
+
+        use socket::Options as SO;
+        use socket::OptionsLevel as SOL;
+        if matches!(level, SOL::SOCKET) {
+            let optname = SO::try_from(optname as u32).map_err(|_| ENOPROTOOPT)?;
             match optname {
-                PosixSocketOption::SO_SNDBUF => {
+                SO::SNDBUF => {
                     // 返回发送缓冲区大小
                     unsafe {
-                        *optval = socket.metadata().tx_buf_size as u32;
+                        *optval = socket.send_buffer_size() as u32;
                         *optlen = core::mem::size_of::<u32>() as u32;
                     }
                     return Ok(0);
                 }
-                PosixSocketOption::SO_RCVBUF => {
+                SO::RCVBUF => {
                     // 返回默认的接收缓冲区大小
                     unsafe {
-                        *optval = socket.metadata().rx_buf_size as u32;
+                        *optval = socket.recv_buffer_size() as u32;
                         *optlen = core::mem::size_of::<u32>() as u32;
                     }
                     return Ok(0);
                 }
                 _ => {
-                    return Err(SystemError::ENOPROTOOPT);
+                    return Err(ENOPROTOOPT);
                 }
             }
         }
@@ -172,19 +208,17 @@ impl Syscall {
         // to be interpreted by the TCP protocol, level should be set to the
         // protocol number of TCP.
 
-        let posix_protocol =
-            PosixIpProtocol::try_from(level as u16).map_err(|_| SystemError::ENOPROTOOPT)?;
-        if posix_protocol == PosixIpProtocol::TCP {
-            let optname = PosixTcpSocketOptions::try_from(optname as i32)
-                .map_err(|_| SystemError::ENOPROTOOPT)?;
+        if matches!(level, SOL::TCP) {
+            let optname =
+                PosixTcpSocketOptions::try_from(optname as i32).map_err(|_| ENOPROTOOPT)?;
             match optname {
                 PosixTcpSocketOptions::Congestion => return Ok(0),
                 _ => {
-                    return Err(SystemError::ENOPROTOOPT);
+                    return Err(ENOPROTOOPT);
                 }
             }
         }
-        return Err(SystemError::ENOPROTOOPT);
+        return Err(ENOPROTOOPT);
     }
 
     /// @brief sys_connect系统调用的实际执行函数
@@ -194,12 +228,11 @@ impl Syscall {
     /// @param addrlen 地址长度
     ///
     /// @return 成功返回0,失败返回错误码
-    pub fn connect(fd: usize, addr: *const SockAddr, addrlen: usize) -> Result<usize, SystemError> {
+    pub fn connect(fd: usize, addr: *const SockAddr, addrlen: u32) -> Result<usize, SystemError> {
         let endpoint: Endpoint = SockAddr::to_endpoint(addr, addrlen)?;
-        let socket: Arc<SocketInode> = ProcessManager::current_pcb()
+        let socket: Arc<socket::Inode> = ProcessManager::current_pcb()
             .get_socket(fd as i32)
             .ok_or(SystemError::EBADF)?;
-        let mut socket = unsafe { socket.inner_no_preempt() };
         socket.connect(endpoint)?;
         Ok(0)
     }
@@ -211,12 +244,19 @@ impl Syscall {
     /// @param addrlen 地址长度
     ///
     /// @return 成功返回0,失败返回错误码
-    pub fn bind(fd: usize, addr: *const SockAddr, addrlen: usize) -> Result<usize, SystemError> {
+    pub fn bind(fd: usize, addr: *const SockAddr, addrlen: u32) -> Result<usize, SystemError> {
+        // 打印收到的参数
+        // log::debug!(
+        //     "bind: fd={:?}, family={:?}, addrlen={:?}",
+        //     fd,
+        //     (unsafe { addr.as_ref().unwrap().family }),
+        //     addrlen
+        // );
         let endpoint: Endpoint = SockAddr::to_endpoint(addr, addrlen)?;
-        let socket: Arc<SocketInode> = ProcessManager::current_pcb()
+        let socket: Arc<socket::Inode> = ProcessManager::current_pcb()
             .get_socket(fd as i32)
             .ok_or(SystemError::EBADF)?;
-        let mut socket = unsafe { socket.inner_no_preempt() };
+        log::debug!("bind: socket={:?}", socket);
         socket.bind(endpoint)?;
         Ok(0)
     }
@@ -233,9 +273,9 @@ impl Syscall {
     pub fn sendto(
         fd: usize,
         buf: &[u8],
-        _flags: u32,
+        flags: u32,
         addr: *const SockAddr,
-        addrlen: usize,
+        addrlen: u32,
     ) -> Result<usize, SystemError> {
         let endpoint = if addr.is_null() {
             None
@@ -243,11 +283,17 @@ impl Syscall {
             Some(SockAddr::to_endpoint(addr, addrlen)?)
         };
 
-        let socket: Arc<SocketInode> = ProcessManager::current_pcb()
+        let flags = socket::MessageFlag::from_bits_truncate(flags);
+
+        let socket: Arc<socket::Inode> = ProcessManager::current_pcb()
             .get_socket(fd as i32)
             .ok_or(SystemError::EBADF)?;
-        let socket = unsafe { socket.inner_no_preempt() };
-        return socket.write(buf, endpoint);
+
+        if let Some(endpoint) = endpoint {
+            return socket.send_to(buf, endpoint, flags);
+        } else {
+            return socket.send(buf, flags);
+        }
     }
 
     /// @brief sys_recvfrom系统调用的实际执行函数
@@ -262,28 +308,37 @@ impl Syscall {
     pub fn recvfrom(
         fd: usize,
         buf: &mut [u8],
-        _flags: u32,
+        flags: u32,
         addr: *mut SockAddr,
-        addrlen: *mut u32,
+        addr_len: *mut u32,
     ) -> Result<usize, SystemError> {
-        let socket: Arc<SocketInode> = ProcessManager::current_pcb()
+        let socket: Arc<socket::Inode> = ProcessManager::current_pcb()
             .get_socket(fd as i32)
             .ok_or(SystemError::EBADF)?;
-        let socket = unsafe { socket.inner_no_preempt() };
+        let flags = socket::MessageFlag::from_bits_truncate(flags);
 
-        let (n, endpoint) = socket.read(buf);
-        drop(socket);
+        if addr.is_null() {
+            let (n, _) = socket.recv_from(buf, flags, None)?;
+            return Ok(n);
+        }
 
-        let n: usize = n?;
+        // address is not null
+        let address = unsafe { addr.as_ref() }.ok_or(EINVAL)?;
 
-        // 如果有地址信息,将地址信息写入用户空间
-        if !addr.is_null() {
+        if unsafe { address.is_empty() } {
+            let (recv_len, endpoint) = socket.recv_from(buf, flags, None)?;
             let sockaddr_in = SockAddr::from(endpoint);
             unsafe {
-                sockaddr_in.write_to_user(addr, addrlen)?;
+                sockaddr_in.write_to_user(addr, addr_len)?;
             }
-        }
-        return Ok(n);
+            return Ok(recv_len);
+        } else {
+            // 从socket中读取数据
+            let addr_len = *unsafe { addr_len.as_ref() }.ok_or(EINVAL)?;
+            let address = SockAddr::to_endpoint(addr, addr_len)?;
+            let (recv_len, _) = socket.recv_from(buf, flags, Some(address))?;
+            return Ok(recv_len);
+        };
     }
 
     /// @brief sys_recvmsg系统调用的实际执行函数
@@ -293,30 +348,30 @@ impl Syscall {
     /// @param flags 标志,暂时未使用
     ///
     /// @return 成功返回接收的字节数,失败返回错误码
-    pub fn recvmsg(fd: usize, msg: &mut MsgHdr, _flags: u32) -> Result<usize, SystemError> {
-        // 检查每个缓冲区地址是否合法,生成iovecs
-        let mut iovs = unsafe { IoVecs::from_user(msg.msg_iov, msg.msg_iovlen, true)? };
-
-        let socket: Arc<SocketInode> = ProcessManager::current_pcb()
-            .get_socket(fd as i32)
-            .ok_or(SystemError::EBADF)?;
-        let socket = unsafe { socket.inner_no_preempt() };
-
-        let mut buf = iovs.new_buf(true);
-        // 从socket中读取数据
-        let (n, endpoint) = socket.read(&mut buf);
-        drop(socket);
-
-        let n: usize = n?;
-
-        // 将数据写入用户空间的iovecs
-        iovs.scatter(&buf[..n]);
-
-        let sockaddr_in = SockAddr::from(endpoint);
-        unsafe {
-            sockaddr_in.write_to_user(msg.msg_name, &mut msg.msg_namelen)?;
-        }
-        return Ok(n);
+    pub fn recvmsg(fd: usize, msg: &mut MsgHdr, flags: u32) -> Result<usize, SystemError> {
+        todo!("recvmsg, fd={}, msg={:?}, flags={}", fd, msg, flags);
+        // // 检查每个缓冲区地址是否合法,生成iovecs
+        // let mut iovs = unsafe { IoVecs::from_user(msg.msg_iov, msg.msg_iovlen, true)? };
+
+        // let socket: Arc<socket::Inode> = ProcessManager::current_pcb()
+        //     .get_socket(fd as i32)
+        //     .ok_or(SystemError::EBADF)?;
+
+        // let flags = socket::MessageFlag::from_bits_truncate(flags as u32);
+
+        // let mut buf = iovs.new_buf(true);
+        // // 从socket中读取数据
+        // let recv_size = socket.recv_msg(&mut buf, flags)?;
+        // drop(socket);
+
+        // // 将数据写入用户空间的iovecs
+        // iovs.scatter(&buf[..recv_size]);
+
+        // // let sockaddr_in = SockAddr::from(endpoint);
+        // // unsafe {
+        // //     sockaddr_in.write_to_user(msg.msg_name, &mut msg.msg_namelen)?;
+        // // }
+        // return Ok(recv_size);
     }
 
     /// @brief sys_listen系统调用的实际执行函数
@@ -326,12 +381,10 @@ impl Syscall {
     ///
     /// @return 成功返回0,失败返回错误码
     pub fn listen(fd: usize, backlog: usize) -> Result<usize, SystemError> {
-        let socket: Arc<SocketInode> = ProcessManager::current_pcb()
+        let socket: Arc<socket::Inode> = ProcessManager::current_pcb()
             .get_socket(fd as i32)
             .ok_or(SystemError::EBADF)?;
-        let mut socket = unsafe { socket.inner_no_preempt() };
-        socket.listen(backlog)?;
-        return Ok(0);
+        socket.listen(backlog).map(|_| 0)
     }
 
     /// @brief sys_shutdown系统调用的实际执行函数
@@ -341,11 +394,10 @@ impl Syscall {
     ///
     /// @return 成功返回0,失败返回错误码
     pub fn shutdown(fd: usize, how: usize) -> Result<usize, SystemError> {
-        let socket: Arc<SocketInode> = ProcessManager::current_pcb()
+        let socket: Arc<socket::Inode> = ProcessManager::current_pcb()
             .get_socket(fd as i32)
             .ok_or(SystemError::EBADF)?;
-        let mut socket = unsafe { socket.inner_no_preempt() };
-        socket.shutdown(ShutdownType::from_bits_truncate((how + 1) as u8))?;
+        socket.shutdown(socket::ShutdownTemp::from_how(how))?;
         return Ok(0);
     }
 
@@ -401,18 +453,16 @@ impl Syscall {
         addrlen: *mut u32,
         flags: u32,
     ) -> Result<usize, SystemError> {
-        let socket: Arc<SocketInode> = ProcessManager::current_pcb()
+        let socket: Arc<socket::Inode> = ProcessManager::current_pcb()
             .get_socket(fd as i32)
             .ok_or(SystemError::EBADF)?;
-        // debug!("accept: socket={:?}", socket);
-        let mut socket = unsafe { socket.inner_no_preempt() };
+
         // 从socket中接收连接
         let (new_socket, remote_endpoint) = socket.accept()?;
         drop(socket);
 
         // debug!("accept: new_socket={:?}", new_socket);
         // Insert the new socket into the file descriptor vector
-        let new_socket: Arc<SocketInode> = SocketInode::new(new_socket);
 
         let mut file_mode = FileMode::O_RDWR;
         if flags & SOCK_NONBLOCK.bits() != 0 {
@@ -456,12 +506,10 @@ impl Syscall {
         if addr.is_null() {
             return Err(SystemError::EINVAL);
         }
-        let socket: Arc<SocketInode> = ProcessManager::current_pcb()
+        let endpoint = ProcessManager::current_pcb()
             .get_socket(fd as i32)
-            .ok_or(SystemError::EBADF)?;
-        let socket = socket.inner();
-        let endpoint: Endpoint = socket.endpoint().ok_or(SystemError::EINVAL)?;
-        drop(socket);
+            .ok_or(SystemError::EBADF)?
+            .get_name()?;
 
         let sockaddr_in = SockAddr::from(endpoint);
         unsafe {
@@ -486,11 +534,11 @@ impl Syscall {
             return Err(SystemError::EINVAL);
         }
 
-        let socket: Arc<SocketInode> = ProcessManager::current_pcb()
+        let socket: Arc<socket::Inode> = ProcessManager::current_pcb()
             .get_socket(fd as i32)
             .ok_or(SystemError::EBADF)?;
-        let socket = socket.inner();
-        let endpoint: Endpoint = socket.peer_endpoint().ok_or(SystemError::EINVAL)?;
+
+        let endpoint: Endpoint = socket.get_peer_name()?;
         drop(socket);
 
         let sockaddr_in = SockAddr::from(endpoint);
@@ -501,449 +549,6 @@ impl Syscall {
     }
 }
 
-// 参考资料: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/netinet_in.h.html#tag_13_32
-#[repr(C)]
-#[derive(Debug, Clone, Copy)]
-pub struct SockAddrIn {
-    pub sin_family: u16,
-    pub sin_port: u16,
-    pub sin_addr: u32,
-    pub sin_zero: [u8; 8],
-}
-
-#[repr(C)]
-#[derive(Debug, Clone, Copy)]
-pub struct SockAddrUn {
-    pub sun_family: u16,
-    pub sun_path: [u8; 108],
-}
-
-#[repr(C)]
-#[derive(Debug, Clone, Copy)]
-pub struct SockAddrLl {
-    pub sll_family: u16,
-    pub sll_protocol: u16,
-    pub sll_ifindex: u32,
-    pub sll_hatype: u16,
-    pub sll_pkttype: u8,
-    pub sll_halen: u8,
-    pub sll_addr: [u8; 8],
-}
-
-#[repr(C)]
-#[derive(Debug, Clone, Copy)]
-pub struct SockAddrNl {
-    nl_family: u16,
-    nl_pad: u16,
-    nl_pid: u32,
-    nl_groups: u32,
-}
-
-#[repr(C)]
-#[derive(Debug, Clone, Copy)]
-pub struct SockAddrPlaceholder {
-    pub family: u16,
-    pub data: [u8; 14],
-}
-
-#[repr(C)]
-#[derive(Clone, Copy)]
-pub union SockAddr {
-    pub family: u16,
-    pub addr_in: SockAddrIn,
-    pub addr_un: SockAddrUn,
-    pub addr_ll: SockAddrLl,
-    pub addr_nl: SockAddrNl,
-    pub addr_ph: SockAddrPlaceholder,
-}
-
-impl SockAddr {
-    /// @brief 把用户传入的SockAddr转换为Endpoint结构体
-    pub fn to_endpoint(addr: *const SockAddr, len: usize) -> Result<Endpoint, SystemError> {
-        verify_area(
-            VirtAddr::new(addr as usize),
-            core::mem::size_of::<SockAddr>(),
-        )
-        .map_err(|_| SystemError::EFAULT)?;
-
-        let addr = unsafe { addr.as_ref() }.ok_or(SystemError::EFAULT)?;
-        unsafe {
-            match AddressFamily::try_from(addr.family)? {
-                AddressFamily::INet => {
-                    if len < addr.len()? {
-                        return Err(SystemError::EINVAL);
-                    }
-
-                    let addr_in: SockAddrIn = addr.addr_in;
-
-                    let ip: wire::IpAddress = wire::IpAddress::from(wire::Ipv4Address::from_bytes(
-                        &u32::from_be(addr_in.sin_addr).to_be_bytes()[..],
-                    ));
-                    let port = u16::from_be(addr_in.sin_port);
-
-                    return Ok(Endpoint::Ip(Some(wire::IpEndpoint::new(ip, port))));
-                }
-                AddressFamily::Unix => {
-                    let addr_un: SockAddrUn = addr.addr_un;
-
-                    let path = CStr::from_bytes_until_nul(&addr_un.sun_path)
-                        .map_err(|_| SystemError::EINVAL)?
-                        .to_str()
-                        .map_err(|_| SystemError::EINVAL)?;
-
-                    let fd = Syscall::open(path.as_ptr(), FileMode::O_RDWR.bits(), 0o755, true)?;
-
-                    let binding = ProcessManager::current_pcb().fd_table();
-                    let fd_table_guard = binding.read();
-
-                    let file = fd_table_guard.get_file_by_fd(fd as i32).unwrap();
-                    if file.file_type() != FileType::Socket {
-                        return Err(SystemError::ENOTSOCK);
-                    }
-                    let inode = file.inode();
-                    let socketinode = inode.as_any_ref().downcast_ref::<Arc<SocketInode>>();
-
-                    return Ok(Endpoint::Inode(socketinode.cloned()));
-                }
-                AddressFamily::Packet => {
-                    // TODO: support packet socket
-                    return Err(SystemError::EINVAL);
-                }
-                AddressFamily::Netlink => {
-                    // TODO: support netlink socket
-                    return Err(SystemError::EINVAL);
-                }
-                _ => {
-                    return Err(SystemError::EINVAL);
-                }
-            }
-        }
-    }
-
-    /// @brief 获取地址长度
-    pub fn len(&self) -> Result<usize, SystemError> {
-        let ret = match AddressFamily::try_from(unsafe { self.family })? {
-            AddressFamily::INet => Ok(core::mem::size_of::<SockAddrIn>()),
-            AddressFamily::Packet => Ok(core::mem::size_of::<SockAddrLl>()),
-            AddressFamily::Netlink => Ok(core::mem::size_of::<SockAddrNl>()),
-            AddressFamily::Unix => Err(SystemError::EINVAL),
-            _ => Err(SystemError::EINVAL),
-        };
-
-        return ret;
-    }
-
-    /// @brief 把SockAddr的数据写入用户空间
-    ///
-    /// @param addr 用户空间的SockAddr的地址
-    /// @param len 要写入的长度
-    ///
-    /// @return 成功返回写入的长度,失败返回错误码
-    pub unsafe fn write_to_user(
-        &self,
-        addr: *mut SockAddr,
-        addr_len: *mut u32,
-    ) -> Result<usize, SystemError> {
-        // 当用户传入的地址或者长度为空时,直接返回0
-        if addr.is_null() || addr_len.is_null() {
-            return Ok(0);
-        }
-
-        // 检查用户传入的地址是否合法
-        verify_area(
-            VirtAddr::new(addr as usize),
-            core::mem::size_of::<SockAddr>(),
-        )
-        .map_err(|_| SystemError::EFAULT)?;
-
-        verify_area(
-            VirtAddr::new(addr_len as usize),
-            core::mem::size_of::<u32>(),
-        )
-        .map_err(|_| SystemError::EFAULT)?;
-
-        let to_write = min(self.len()?, *addr_len as usize);
-        if to_write > 0 {
-            let buf = core::slice::from_raw_parts_mut(addr as *mut u8, to_write);
-            buf.copy_from_slice(core::slice::from_raw_parts(
-                self as *const SockAddr as *const u8,
-                to_write,
-            ));
-        }
-        *addr_len = self.len()? as u32;
-        return Ok(to_write);
-    }
-}
-
-impl From<Endpoint> for SockAddr {
-    fn from(value: Endpoint) -> Self {
-        match value {
-            Endpoint::Ip(ip_endpoint) => {
-                // 未指定地址
-                if ip_endpoint.is_none() {
-                    return SockAddr {
-                        addr_ph: SockAddrPlaceholder {
-                            family: AddressFamily::Unspecified as u16,
-                            data: [0; 14],
-                        },
-                    };
-                }
-                // 指定了地址
-                let ip_endpoint = ip_endpoint.unwrap();
-                match ip_endpoint.addr {
-                    wire::IpAddress::Ipv4(ipv4_addr) => {
-                        let addr_in = SockAddrIn {
-                            sin_family: AddressFamily::INet as u16,
-                            sin_port: ip_endpoint.port.to_be(),
-                            sin_addr: u32::from_be_bytes(ipv4_addr.0).to_be(),
-                            sin_zero: [0; 8],
-                        };
-
-                        return SockAddr { addr_in };
-                    }
-                    _ => {
-                        unimplemented!("not support ipv6");
-                    }
-                }
-            }
-
-            Endpoint::LinkLayer(link_endpoint) => {
-                let addr_ll = SockAddrLl {
-                    sll_family: AddressFamily::Packet as u16,
-                    sll_protocol: 0,
-                    sll_ifindex: link_endpoint.interface as u32,
-                    sll_hatype: 0,
-                    sll_pkttype: 0,
-                    sll_halen: 0,
-                    sll_addr: [0; 8],
-                };
-
-                return SockAddr { addr_ll };
-            }
-
-            _ => {
-                // todo: support other endpoint, like Netlink...
-                unimplemented!("not support {value:?}");
-            }
-        }
-    }
-}
-
-#[repr(C)]
-#[derive(Debug, Clone, Copy)]
-pub struct MsgHdr {
-    /// 指向一个SockAddr结构体的指针
-    pub msg_name: *mut SockAddr,
-    /// SockAddr结构体的大小
-    pub msg_namelen: u32,
-    /// scatter/gather array
-    pub msg_iov: *mut IoVec,
-    /// elements in msg_iov
-    pub msg_iovlen: usize,
-    /// 辅助数据
-    pub msg_control: *mut u8,
-    /// 辅助数据长度
-    pub msg_controllen: usize,
-    /// 接收到的消息的标志
-    pub msg_flags: u32,
-}
-
-#[derive(Debug, Clone, Copy, FromPrimitive, ToPrimitive, PartialEq, Eq)]
-pub enum PosixIpProtocol {
-    /// Dummy protocol for TCP.
-    IP = 0,
-    /// Internet Control Message Protocol.
-    ICMP = 1,
-    /// Internet Group Management Protocol.
-    IGMP = 2,
-    /// IPIP tunnels (older KA9Q tunnels use 94).
-    IPIP = 4,
-    /// Transmission Control Protocol.
-    TCP = 6,
-    /// Exterior Gateway Protocol.
-    EGP = 8,
-    /// PUP protocol.
-    PUP = 12,
-    /// User Datagram Protocol.
-    UDP = 17,
-    /// XNS IDP protocol.
-    IDP = 22,
-    /// SO Transport Protocol Class 4.
-    TP = 29,
-    /// Datagram Congestion Control Protocol.
-    DCCP = 33,
-    /// IPv6-in-IPv4 tunnelling.
-    IPv6 = 41,
-    /// RSVP Protocol.
-    RSVP = 46,
-    /// Generic Routing Encapsulation. (Cisco GRE) (rfc 1701, 1702)
-    GRE = 47,
-    /// Encapsulation Security Payload protocol
-    ESP = 50,
-    /// Authentication Header protocol
-    AH = 51,
-    /// Multicast Transport Protocol.
-    MTP = 92,
-    /// IP option pseudo header for BEET
-    BEETPH = 94,
-    /// Encapsulation Header.
-    ENCAP = 98,
-    /// Protocol Independent Multicast.
-    PIM = 103,
-    /// Compression Header Protocol.
-    COMP = 108,
-    /// Stream Control Transport Protocol
-    SCTP = 132,
-    /// UDP-Lite protocol (RFC 3828)
-    UDPLITE = 136,
-    /// MPLS in IP (RFC 4023)
-    MPLSINIP = 137,
-    /// Ethernet-within-IPv6 Encapsulation
-    ETHERNET = 143,
-    /// Raw IP packets
-    RAW = 255,
-    /// Multipath TCP connection
-    MPTCP = 262,
-}
-
-impl TryFrom<u16> for PosixIpProtocol {
-    type Error = SystemError;
-
-    fn try_from(value: u16) -> Result<Self, Self::Error> {
-        match <Self as FromPrimitive>::from_u16(value) {
-            Some(p) => Ok(p),
-            None => Err(SystemError::EPROTONOSUPPORT),
-        }
-    }
-}
-
-impl From<PosixIpProtocol> for u16 {
-    fn from(value: PosixIpProtocol) -> Self {
-        <PosixIpProtocol as ToPrimitive>::to_u16(&value).unwrap()
-    }
-}
-
-#[allow(non_camel_case_types)]
-#[derive(Debug, Clone, Copy, FromPrimitive, ToPrimitive, PartialEq, Eq)]
-pub enum PosixSocketOption {
-    SO_DEBUG = 1,
-    SO_REUSEADDR = 2,
-    SO_TYPE = 3,
-    SO_ERROR = 4,
-    SO_DONTROUTE = 5,
-    SO_BROADCAST = 6,
-    SO_SNDBUF = 7,
-    SO_RCVBUF = 8,
-    SO_SNDBUFFORCE = 32,
-    SO_RCVBUFFORCE = 33,
-    SO_KEEPALIVE = 9,
-    SO_OOBINLINE = 10,
-    SO_NO_CHECK = 11,
-    SO_PRIORITY = 12,
-    SO_LINGER = 13,
-    SO_BSDCOMPAT = 14,
-    SO_REUSEPORT = 15,
-    SO_PASSCRED = 16,
-    SO_PEERCRED = 17,
-    SO_RCVLOWAT = 18,
-    SO_SNDLOWAT = 19,
-    SO_RCVTIMEO_OLD = 20,
-    SO_SNDTIMEO_OLD = 21,
-
-    SO_SECURITY_AUTHENTICATION = 22,
-    SO_SECURITY_ENCRYPTION_TRANSPORT = 23,
-    SO_SECURITY_ENCRYPTION_NETWORK = 24,
-
-    SO_BINDTODEVICE = 25,
-
-    /// 与SO_GET_FILTER相同
-    SO_ATTACH_FILTER = 26,
-    SO_DETACH_FILTER = 27,
-
-    SO_PEERNAME = 28,
-
-    SO_ACCEPTCONN = 30,
-
-    SO_PEERSEC = 31,
-    SO_PASSSEC = 34,
-
-    SO_MARK = 36,
-
-    SO_PROTOCOL = 38,
-    SO_DOMAIN = 39,
-
-    SO_RXQ_OVFL = 40,
-
-    /// 与SCM_WIFI_STATUS相同
-    SO_WIFI_STATUS = 41,
-    SO_PEEK_OFF = 42,
-
-    /* Instruct lower device to use last 4-bytes of skb data as FCS */
-    SO_NOFCS = 43,
-
-    SO_LOCK_FILTER = 44,
-    SO_SELECT_ERR_QUEUE = 45,
-    SO_BUSY_POLL = 46,
-    SO_MAX_PACING_RATE = 47,
-    SO_BPF_EXTENSIONS = 48,
-    SO_INCOMING_CPU = 49,
-    SO_ATTACH_BPF = 50,
-    // SO_DETACH_BPF = SO_DETACH_FILTER,
-    SO_ATTACH_REUSEPORT_CBPF = 51,
-    SO_ATTACH_REUSEPORT_EBPF = 52,
-
-    SO_CNX_ADVICE = 53,
-    SCM_TIMESTAMPING_OPT_STATS = 54,
-    SO_MEMINFO = 55,
-    SO_INCOMING_NAPI_ID = 56,
-    SO_COOKIE = 57,
-    SCM_TIMESTAMPING_PKTINFO = 58,
-    SO_PEERGROUPS = 59,
-    SO_ZEROCOPY = 60,
-    /// 与SCM_TXTIME相同
-    SO_TXTIME = 61,
-
-    SO_BINDTOIFINDEX = 62,
-
-    SO_TIMESTAMP_OLD = 29,
-    SO_TIMESTAMPNS_OLD = 35,
-    SO_TIMESTAMPING_OLD = 37,
-    SO_TIMESTAMP_NEW = 63,
-    SO_TIMESTAMPNS_NEW = 64,
-    SO_TIMESTAMPING_NEW = 65,
-
-    SO_RCVTIMEO_NEW = 66,
-    SO_SNDTIMEO_NEW = 67,
-
-    SO_DETACH_REUSEPORT_BPF = 68,
-
-    SO_PREFER_BUSY_POLL = 69,
-    SO_BUSY_POLL_BUDGET = 70,
-
-    SO_NETNS_COOKIE = 71,
-    SO_BUF_LOCK = 72,
-    SO_RESERVE_MEM = 73,
-    SO_TXREHASH = 74,
-    SO_RCVMARK = 75,
-}
-
-impl TryFrom<i32> for PosixSocketOption {
-    type Error = SystemError;
-
-    fn try_from(value: i32) -> Result<Self, Self::Error> {
-        match <Self as FromPrimitive>::from_i32(value) {
-            Some(p) => Ok(p),
-            None => Err(SystemError::EINVAL),
-        }
-    }
-}
-
-impl From<PosixSocketOption> for i32 {
-    fn from(value: PosixSocketOption) -> Self {
-        <PosixSocketOption as ToPrimitive>::to_i32(&value).unwrap()
-    }
-}
-
 #[derive(Debug, Clone, Copy, PartialEq, Eq, FromPrimitive, ToPrimitive)]
 pub enum PosixTcpSocketOptions {
     /// Turn off Nagle's algorithm.

+ 347 - 0
kernel/src/net/syscall_util.rs

@@ -0,0 +1,347 @@
+bitflags::bitflags! {
+    // #[derive(PartialEq, Eq, Debug, Clone, Copy)]
+    pub struct SysArgSocketType: u32 {
+        const DGRAM     = 1;    // 0b0000_0001
+        const STREAM    = 2;    // 0b0000_0010
+        const RAW       = 3;    // 0b0000_0011
+        const RDM       = 4;    // 0b0000_0100
+        const SEQPACKET = 5;    // 0b0000_0101
+        const DCCP      = 6;    // 0b0000_0110
+        const PACKET    = 10;   // 0b0000_1010
+
+        const NONBLOCK  = crate::filesystem::vfs::file::FileMode::O_NONBLOCK.bits();
+        const CLOEXEC   = crate::filesystem::vfs::file::FileMode::O_CLOEXEC.bits();
+    }
+}
+
+impl SysArgSocketType {
+    #[inline(always)]
+    pub fn types(&self) -> SysArgSocketType {
+        SysArgSocketType::from_bits(self.bits() & 0b_1111).unwrap()
+    }
+
+    #[inline(always)]
+    pub fn is_nonblock(&self) -> bool {
+        self.contains(SysArgSocketType::NONBLOCK)
+    }
+
+    #[inline(always)]
+    pub fn is_cloexec(&self) -> bool {
+        self.contains(SysArgSocketType::CLOEXEC)
+    }
+}
+
+use alloc::sync::Arc;
+use core::ffi::CStr;
+use unix::INODE_MAP;
+
+use crate::{
+    filesystem::vfs::{
+        file::FileMode, FileType, IndexNode, MAX_PATHLEN, ROOT_INODE, VFS_MAX_FOLLOW_SYMLINK_TIMES,
+    },
+    libs::casting::DowncastArc,
+    mm::{verify_area, VirtAddr},
+    net::socket::{self, *},
+    process::ProcessManager,
+    syscall::Syscall,
+};
+use smoltcp;
+use system_error::SystemError::{self, *};
+
+// 参考资料: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/netinet_in.h.html#tag_13_32
+#[repr(C)]
+#[derive(Debug, Clone, Copy)]
+pub struct SockAddrIn {
+    pub sin_family: u16,
+    pub sin_port: u16,
+    pub sin_addr: u32,
+    pub sin_zero: [u8; 8],
+}
+
+#[repr(C)]
+#[derive(Debug, Clone, Copy)]
+pub struct SockAddrUn {
+    pub sun_family: u16,
+    pub sun_path: [u8; 108],
+}
+
+#[repr(C)]
+#[derive(Debug, Clone, Copy)]
+pub struct SockAddrLl {
+    pub sll_family: u16,
+    pub sll_protocol: u16,
+    pub sll_ifindex: u32,
+    pub sll_hatype: u16,
+    pub sll_pkttype: u8,
+    pub sll_halen: u8,
+    pub sll_addr: [u8; 8],
+}
+
+#[repr(C)]
+#[derive(Debug, Clone, Copy)]
+pub struct SockAddrNl {
+    pub nl_family: AddressFamily,
+    pub nl_pad: u16,
+    pub nl_pid: u32,
+    pub nl_groups: u32,
+}
+
+#[repr(C)]
+#[derive(Debug, Clone, Copy)]
+pub struct SockAddrPlaceholder {
+    pub family: u16,
+    pub data: [u8; 14],
+}
+
+#[repr(C)]
+#[derive(Clone, Copy)]
+pub union SockAddr {
+    pub family: u16,
+    pub addr_in: SockAddrIn,
+    pub addr_un: SockAddrUn,
+    pub addr_ll: SockAddrLl,
+    pub addr_nl: SockAddrNl,
+    pub addr_ph: SockAddrPlaceholder,
+}
+
+impl SockAddr {
+    /// @brief 把用户传入的SockAddr转换为Endpoint结构体
+    pub fn to_endpoint(addr: *const SockAddr, len: u32) -> Result<Endpoint, SystemError> {
+        use crate::net::socket::AddressFamily;
+
+        let addr = unsafe { addr.as_ref() }.ok_or(SystemError::EFAULT)?;
+
+        unsafe {
+            match AddressFamily::try_from(addr.family)? {
+                AddressFamily::INet => {
+                    if len < addr.len()? {
+                        log::error!("len < addr.len()");
+                        return Err(SystemError::EINVAL);
+                    }
+
+                    let addr_in: SockAddrIn = addr.addr_in;
+
+                    use smoltcp::wire;
+                    let ip: wire::IpAddress = wire::IpAddress::from(wire::Ipv4Address::from_bytes(
+                        &u32::from_be(addr_in.sin_addr).to_be_bytes()[..],
+                    ));
+                    let port = u16::from_be(addr_in.sin_port);
+
+                    return Ok(Endpoint::Ip(wire::IpEndpoint::new(ip, port)));
+                }
+                AddressFamily::Unix => {
+                    let addr_un: SockAddrUn = addr.addr_un;
+
+                    let path = CStr::from_bytes_until_nul(&addr_un.sun_path)
+                        .map_err(|_| {
+                            log::error!("CStr::from_bytes_until_nul fail");
+                            SystemError::EINVAL
+                        })?
+                        .to_str()
+                        .map_err(|_| {
+                            log::error!("CStr::to_str fail");
+                            SystemError::EINVAL
+                        })?;
+
+                    let (inode_begin, path) = crate::filesystem::vfs::utils::user_path_at(
+                        &ProcessManager::current_pcb(),
+                        crate::filesystem::vfs::fcntl::AtFlags::AT_FDCWD.bits(),
+                        path.trim(),
+                    )?;
+                    let inode0: Result<Arc<dyn IndexNode>, SystemError> =
+                        inode_begin.lookup_follow_symlink(&path, VFS_MAX_FOLLOW_SYMLINK_TIMES);
+
+                    let inode = match inode0 {
+                        Ok(inode) => inode,
+                        Err(_) => {
+                            let (filename, parent_path) =
+                                crate::filesystem::vfs::utils::rsplit_path(&path);
+                            // 查找父目录
+                            log::debug!("filename {:?} parent_path {:?}", filename, parent_path);
+
+                            let parent_inode: Arc<dyn IndexNode> =
+                                ROOT_INODE().lookup(parent_path.unwrap_or("/"))?;
+                            // 创建文件
+                            let inode: Arc<dyn IndexNode> = match parent_inode.create(
+                                filename,
+                                FileType::File,
+                                crate::filesystem::vfs::syscall::ModeType::from_bits_truncate(
+                                    0o755,
+                                ),
+                            ) {
+                                Ok(inode) => inode,
+                                Err(e) => {
+                                    log::debug!("inode create fail {:?}", e);
+                                    return Err(e);
+                                }
+                            };
+                            inode
+                        }
+                    };
+
+                    return Ok(Endpoint::Unixpath((inode.metadata()?.inode_id, path)));
+                }
+                AddressFamily::Packet => {
+                    // TODO: support packet socket
+                    log::warn!("not support address family {:?}", addr.family);
+                    return Err(SystemError::EINVAL);
+                }
+                AddressFamily::Netlink => {
+                    // TODO: support netlink socket
+                    let addr: SockAddrNl = addr.addr_nl;
+                    return Ok(Endpoint::Netlink(NetlinkEndpoint::new(addr)));
+                }
+                _ => {
+                    log::warn!("not support address family {:?}", addr.family);
+                    return Err(SystemError::EINVAL);
+                }
+            }
+        }
+    }
+
+    /// @brief 获取地址长度
+    pub fn len(&self) -> Result<u32, SystemError> {
+        match AddressFamily::try_from(unsafe { self.family })? {
+            AddressFamily::INet => Ok(core::mem::size_of::<SockAddrIn>()),
+            AddressFamily::Packet => Ok(core::mem::size_of::<SockAddrLl>()),
+            AddressFamily::Netlink => Ok(core::mem::size_of::<SockAddrNl>()),
+            AddressFamily::Unix => Ok(core::mem::size_of::<SockAddrUn>()),
+            _ => Err(SystemError::EINVAL),
+        }
+        .map(|x| x as u32)
+    }
+
+    /// @brief 把SockAddr的数据写入用户空间
+    ///
+    /// @param addr 用户空间的SockAddr的地址
+    /// @param len 要写入的长度
+    ///
+    /// @return 成功返回写入的长度,失败返回错误码
+    pub unsafe fn write_to_user(
+        &self,
+        addr: *mut SockAddr,
+        addr_len: *mut u32,
+    ) -> Result<u32, SystemError> {
+        // 当用户传入的地址或者长度为空时,直接返回0
+        if addr.is_null() || addr_len.is_null() {
+            return Ok(0);
+        }
+
+        // 检查用户传入的地址是否合法
+        verify_area(
+            VirtAddr::new(addr as usize),
+            core::mem::size_of::<SockAddr>(),
+        )
+        .map_err(|_| SystemError::EFAULT)?;
+
+        verify_area(
+            VirtAddr::new(addr_len as usize),
+            core::mem::size_of::<u32>(),
+        )
+        .map_err(|_| SystemError::EFAULT)?;
+
+        let to_write = core::cmp::min(self.len()?, *addr_len);
+        if to_write > 0 {
+            let buf = core::slice::from_raw_parts_mut(addr as *mut u8, to_write as usize);
+            buf.copy_from_slice(core::slice::from_raw_parts(
+                self as *const SockAddr as *const u8,
+                to_write as usize,
+            ));
+        }
+        *addr_len = self.len()?;
+        return Ok(to_write);
+    }
+
+    pub unsafe fn is_empty(&self) -> bool {
+        unsafe { self.family == 0 && self.addr_ph.data == [0; 14] }
+    }
+}
+
+impl From<Endpoint> for SockAddr {
+    fn from(value: Endpoint) -> Self {
+        match value {
+            Endpoint::Ip(ip_endpoint) => match ip_endpoint.addr {
+                smoltcp::wire::IpAddress::Ipv4(ipv4_addr) => {
+                    let addr_in = SockAddrIn {
+                        sin_family: AddressFamily::INet as u16,
+                        sin_port: ip_endpoint.port.to_be(),
+                        sin_addr: u32::from_be_bytes(ipv4_addr.0).to_be(),
+                        sin_zero: [0; 8],
+                    };
+
+                    return SockAddr { addr_in };
+                }
+                _ => {
+                    unimplemented!("not support ipv6");
+                }
+            },
+
+            Endpoint::LinkLayer(link_endpoint) => {
+                let addr_ll = SockAddrLl {
+                    sll_family: AddressFamily::Packet as u16,
+                    sll_protocol: 0,
+                    sll_ifindex: link_endpoint.interface as u32,
+                    sll_hatype: 0,
+                    sll_pkttype: 0,
+                    sll_halen: 0,
+                    sll_addr: [0; 8],
+                };
+
+                return SockAddr { addr_ll };
+            }
+
+            Endpoint::Netlink(netlink_endpoint) => {
+                let addr_nl = SockAddrNl {
+                    nl_family: AddressFamily::Netlink,
+                    nl_pad: 0,
+                    nl_pid: netlink_endpoint.addr.nl_pid,
+                    nl_groups: netlink_endpoint.addr.nl_groups,
+                };
+
+                return SockAddr { addr_nl };
+            }
+
+            Endpoint::Inode((_, path)) => {
+                log::debug!("from unix path {:?}", path);
+                let bytes = path.as_bytes();
+                let mut sun_path = [0u8; 108];
+                if bytes.len() <= 108 {
+                    sun_path[..bytes.len()].copy_from_slice(bytes);
+                } else {
+                    panic!("unix address path too long!");
+                }
+                let addr_un = SockAddrUn {
+                    sun_family: AddressFamily::Unix as u16,
+                    sun_path: sun_path,
+                };
+                return SockAddr { addr_un };
+            }
+
+            _ => {
+                // todo: support other endpoint, like Netlink...
+                unimplemented!("not support {value:?}");
+            }
+        }
+    }
+}
+
+#[repr(C)]
+#[derive(Debug, Clone, Copy)]
+pub struct MsgHdr {
+    /// 指向一个SockAddr结构体的指针
+    pub msg_name: *mut SockAddr,
+    /// SockAddr结构体的大小
+    pub msg_namelen: u32,
+    /// scatter/gather array
+    pub msg_iov: *mut crate::filesystem::vfs::syscall::IoVec,
+    /// elements in msg_iov
+    pub msg_iovlen: usize,
+    /// 辅助数据
+    pub msg_control: *mut u8,
+    /// 辅助数据长度
+    pub msg_controllen: u32,
+    /// 接收到的消息的标志
+    pub msg_flags: u32,
+}
+
+// TODO: 从用户态读取MsgHdr,以及写入MsgHdr

+ 2 - 1
kernel/src/process/mod.rs

@@ -50,7 +50,8 @@ use crate::{
         ucontext::AddressSpace,
         VirtAddr,
     },
-    net::socket::SocketInode,
+    net::socket::Inode as SocketInode,
+    // net::socket::SocketInode,
     sched::completion::Completion,
     sched::{
         cpu_rq, fair::FairSchedEntity, prio::MAX_PRIO, DequeueFlag, EnqueueFlag, OnRq, SchedMode,

+ 8 - 8
kernel/src/syscall/mod.rs

@@ -456,9 +456,10 @@ impl Syscall {
                     // 地址空间超出了用户空间的范围,不合法
                     Err(SystemError::EFAULT)
                 } else {
-                    Self::connect(args[0], addr, addrlen)
+                    Self::connect(args[0], addr, addrlen as u32)
                 }
             }
+
             SYS_BIND => {
                 let addr = args[1] as *const SockAddr;
                 let addrlen = args[2];
@@ -468,7 +469,7 @@ impl Syscall {
                     // 地址空间超出了用户空间的范围,不合法
                     Err(SystemError::EFAULT)
                 } else {
-                    Self::bind(args[0], addr, addrlen)
+                    Self::bind(args[0], addr, addrlen as u32)
                 }
             }
 
@@ -486,7 +487,7 @@ impl Syscall {
                     Err(SystemError::EFAULT)
                 } else {
                     let data: &[u8] = unsafe { core::slice::from_raw_parts(buf, len) };
-                    Self::sendto(args[0], data, flags, addr, addrlen)
+                    Self::sendto(args[0], data, flags, addr, addrlen as u32)
                 }
             }
 
@@ -495,7 +496,7 @@ impl Syscall {
                 let len = args[2];
                 let flags = args[3] as u32;
                 let addr = args[4] as *mut SockAddr;
-                let addrlen = args[5] as *mut usize;
+                let addrlen = args[5] as *mut u32;
                 let virt_buf = VirtAddr::new(buf as usize);
                 let virt_addrlen = VirtAddr::new(addrlen as usize);
                 let virt_addr = VirtAddr::new(addr as usize);
@@ -507,7 +508,7 @@ impl Syscall {
                     }
 
                     // 验证addrlen的地址是否合法
-                    if verify_area(virt_addrlen, core::mem::size_of::<u32>()).is_err() {
+                    if verify_area(virt_addrlen, core::mem::size_of::<usize>()).is_err() {
                         // 地址空间超出了用户空间的范围,不合法
                         return Err(SystemError::EFAULT);
                     }
@@ -518,12 +519,11 @@ impl Syscall {
                     }
                     return Ok(());
                 };
-                let r = security_check();
-                if let Err(e) = r {
+                if let Err(e) = security_check() {
                     Err(e)
                 } else {
                     let buf = unsafe { core::slice::from_raw_parts_mut(buf, len) };
-                    Self::recvfrom(args[0], buf, flags, addr, addrlen as *mut u32)
+                    Self::recvfrom(args[0], buf, flags, addr, addrlen)
                 }
             }
 

+ 1 - 1
tools/run-qemu.sh

@@ -143,7 +143,7 @@ while true;do
 # ps: 下面这条使用tap的方式,无法dhcp获取到ip,暂时不知道为什么
 # QEMU_DEVICES="-device ahci,id=ahci -device ide-hd,drive=disk,bus=ahci.0 -net nic,netdev=nic0 -netdev tap,id=nic0,model=virtio-net-pci,script=qemu/ifup-nat,downscript=qemu/ifdown-nat -usb -device qemu-xhci,id=xhci,p2=8,p3=4 "
 QEMU_DEVICES+="${QEMU_DEVICES_DISK} "
-QEMU_DEVICES+=" -netdev user,id=hostnet0,hostfwd=tcp::12580-:12580 -device virtio-net-pci,vectors=5,netdev=hostnet0,id=net0 -usb -device qemu-xhci,id=xhci,p2=8,p3=4 " 
+QEMU_DEVICES+=" -netdev user,id=hostnet0,hostfwd=tcp::12580-:12580,hostfwd=udp::12549-:12549 -device virtio-net-pci,vectors=5,netdev=hostnet0,id=net0 -usb -device qemu-xhci,id=xhci,p2=8,p3=4 " 
 # E1000E
 # QEMU_DEVICES="-device ahci,id=ahci -device ide-hd,drive=disk,bus=ahci.0 -netdev user,id=hostnet0,hostfwd=tcp::12580-:12580 -net nic,model=e1000e,netdev=hostnet0,id=net0 -netdev user,id=hostnet1,hostfwd=tcp::12581-:12581 -device virtio-net-pci,vectors=5,netdev=hostnet1,id=net1 -usb -device qemu-xhci,id=xhci,p2=8,p3=4 " 
 QEMU_ARGUMENT+="-d ${QEMU_DISK_IMAGE} -m ${QEMU_MEMORY} -smp ${QEMU_SMP} -boot order=d ${QEMU_MONITOR} -d ${qemu_trace_std} "

+ 3 - 0
user/apps/ping/.gitignore

@@ -0,0 +1,3 @@
+/target
+Cargo.lock
+/install/

+ 18 - 0
user/apps/ping/Cargo.toml

@@ -0,0 +1,18 @@
+[package]
+name = "ping"
+version = "0.1.0"
+edition = "2021"
+description = "ping for dragonOS"
+authors = [ "smallc <[email protected]>" ]
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+anyhow = "1.0.86"
+clap = { version = "4.5.11", features = ["derive"] }
+crossbeam-channel = "0.5.13"
+pnet = "0.35.0"
+rand = "0.8.5"
+signal-hook = "0.3.17"
+socket2 = "0.5.7"
+thiserror = "1.0.63"

+ 56 - 0
user/apps/ping/Makefile

@@ -0,0 +1,56 @@
+TOOLCHAIN=
+RUSTFLAGS=
+
+ifdef DADK_CURRENT_BUILD_DIR
+# 如果是在dadk中编译,那么安装到dadk的安装目录中
+	INSTALL_DIR = $(DADK_CURRENT_BUILD_DIR)
+else
+# 如果是在本地编译,那么安装到当前目录下的install目录中
+	INSTALL_DIR = ./install
+endif
+
+ifeq ($(ARCH), x86_64)
+	export RUST_TARGET=x86_64-unknown-linux-musl
+else ifeq ($(ARCH), riscv64)
+	export RUST_TARGET=riscv64gc-unknown-linux-gnu
+else 
+# 默认为x86_86,用于本地编译
+	export RUST_TARGET=x86_64-unknown-linux-musl
+endif
+
+run:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) run --target $(RUST_TARGET)
+
+build:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) build --target $(RUST_TARGET)
+
+clean:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) clean --target $(RUST_TARGET)
+
+test:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) test --target $(RUST_TARGET)
+
+doc:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) doc --target $(RUST_TARGET)
+
+fmt:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) fmt
+
+fmt-check:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) fmt --check
+
+run-release:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) run --target $(RUST_TARGET) --release
+
+build-release:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) build --target $(RUST_TARGET) --release
+
+clean-release:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) clean --target $(RUST_TARGET) --release
+
+test-release:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) test --target $(RUST_TARGET) --release
+
+.PHONY: install
+install:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) install --target $(RUST_TARGET) --path . --no-track --root $(INSTALL_DIR) --force

+ 23 - 0
user/apps/ping/README.md

@@ -0,0 +1,23 @@
+# PING
+为DragonOS实现ping
+## NAME
+ping - 向网络主机发送ICMP ECHO_REQUEST
+## SYNOPSIS
+[-c count]: 指定 ping 的次数。例如,`-c 4` 会向目标主机发送 4 个 ping 请求。
+
+[-i interval]:指定两次 ping 请求之间的时间间隔,单位是秒。例如,`-i 2` 会每 2 秒发送一次 ping 请求。
+
+[-w timeout]: 指定等待 ping 响应的超时时间,单位是秒。例如,`-w 5` 会在 5 秒后超时。
+
+[-s packetsize]:指定发送的 ICMP Packet 的大小,单位是字节。例如,`-s 64` 会发送大小为 64 字节的 ICMP Packet。
+
+[-t ttl]:指定 ping 的 TTL (Time to Live)。例如,`-t 64` 会设置 TTL 为 64。
+
+{destination}:指定要 ping 的目标主机。可以是 IP 地址或者主机名。例如,`192.168.1.1` 或 `www.example.com`。
+
+## DESCRIPTION
+ping 使用 ICMP 协议的必需的 ECHO_REQUEST 数据报来引发主机或网关的 ICMP ECHO_RESPONSE。ECHO_REQUEST 数据报(“ping”)具有 IP 和 ICMP 头,后面跟着一个 struct timeval,然后是用于填充数据包的任意数量的“填充”字节。
+
+ping 支持 IPv4 和 IPv6。可以通过指定 -4 或 -6 来强制只使用其中一个。
+
+ping 还可以发送 IPv6 节点信息查询(RFC4620)。可能不允许中间跳跃,因为 IPv6 源路由已被弃用(RFC5095)。

+ 50 - 0
user/apps/ping/src/args.rs

@@ -0,0 +1,50 @@
+use clap::{arg, command, Parser};
+use rand::random;
+
+use crate::config::{Config, IpAddress};
+
+/// # Args结构体
+/// 使用clap库对命令行输入进行pasing,产生参数配置
+#[derive(Parser, Debug, Clone)]
+#[command(author, version, about, long_about = None)]
+pub struct Args {
+    // Count of ping times
+    #[arg(short, default_value_t = 4)]
+    count: u16,
+
+    // Ping packet size
+    #[arg(short = 's', default_value_t = 64)]
+    packet_size: usize,
+
+    // Ping ttl
+    #[arg(short = 't', default_value_t = 64)]
+    ttl: u32,
+
+    // Ping timeout seconds
+    #[arg(short = 'w', default_value_t = 1)]
+    timeout: u64,
+
+    // Ping interval duration milliseconds
+    #[arg(short = 'i', default_value_t = 1000)]
+    interval: u64,
+
+    // Ping destination, ip or domain
+    #[arg(value_parser=IpAddress::parse)]
+    destination: IpAddress,
+}
+
+impl Args {
+    /// # 将Args结构体转换为config结构体
+    pub fn as_config(&self) -> Config {
+        Config {
+            count: self.count,
+            packet_size: self.packet_size,
+            ttl: self.ttl,
+            timeout: self.timeout,
+            interval: self.interval,
+            id: random::<u16>(),
+            sequence: 1,
+            address: self.destination.clone(),
+        }
+    }
+}

+ 45 - 0
user/apps/ping/src/config.rs

@@ -0,0 +1,45 @@
+use anyhow::bail;
+use std::{
+    ffi::CString,
+    net::{self},
+};
+
+use crate::error;
+
+///# Config结构体
+/// 记录ping指令的一些参数值
+#[derive(Debug, Clone)]
+pub struct Config {
+    pub count: u16,
+    pub packet_size: usize,
+    pub ttl: u32,
+    pub timeout: u64,
+    pub interval: u64,
+    pub id: u16,
+    pub sequence: u16,
+    pub address: IpAddress,
+}
+
+///# 目标地址ip结构体
+/// ip负责提供给socket使用
+/// raw负责打印输出
+#[derive(Debug, Clone)]
+pub struct IpAddress {
+    pub ip: net::IpAddr,
+    pub raw: String,
+}
+
+impl IpAddress {
+    pub fn parse(host: &str) -> anyhow::Result<Self> {
+        let raw = String::from(host);
+        let opt = host.parse::<net::IpAddr>().ok();
+        match opt {
+            Some(ip) => Ok(Self { ip, raw }),
+            None => {
+                bail!(error::PingError::InvalidConfig(
+                    "Invalid Address".to_string()
+                ));
+            }
+        }
+    }
+}

+ 10 - 0
user/apps/ping/src/error.rs

@@ -0,0 +1,10 @@
+#![allow(dead_code)]
+
+#[derive(Debug, Clone, thiserror::Error)]
+pub enum PingError {
+    #[error("invaild config")]
+    InvalidConfig(String),
+
+    #[error("invaild packet")]
+    InvalidPacket,
+}

+ 23 - 0
user/apps/ping/src/main.rs

@@ -0,0 +1,23 @@
+use args::Args;
+use clap::Parser;
+use std::format;
+
+mod args;
+mod config;
+mod error;
+mod ping;
+///# ping入口主函数
+fn main() {
+    let args = Args::parse();
+    match ping::Ping::new(args.as_config()) {
+        Ok(pinger) => pinger.run().unwrap_or_else(|e| {
+            exit(format!("Error on run ping: {}", e));
+        }),
+        Err(e) => exit(format!("Error on init: {}", e)),
+    }
+}
+
+fn exit(msg: String) {
+    eprintln!("{}", msg);
+    std::process::exit(1);
+}

+ 151 - 0
user/apps/ping/src/ping.rs

@@ -0,0 +1,151 @@
+use crossbeam_channel::{bounded, select, Receiver};
+use pnet::packet::{
+    icmp::{
+        echo_reply::{EchoReplyPacket, IcmpCodes},
+        echo_request::MutableEchoRequestPacket,
+        IcmpTypes,
+    },
+    util, Packet,
+};
+use signal_hook::consts::{SIGINT, SIGTERM};
+use socket2::{Domain, Protocol, Socket, Type};
+use std::{
+    io,
+    net::{self, Ipv4Addr, SocketAddr},
+    sync::{
+        atomic::{AtomicU64, Ordering},
+        Arc,
+    },
+    thread::{self},
+    time::{Duration, Instant},
+};
+
+use crate::{config::Config, error::PingError};
+
+#[derive(Clone)]
+pub struct Ping {
+    config: Config,
+    socket: Arc<Socket>,
+    dest: SocketAddr,
+}
+
+impl Ping {
+    ///# ping创建函数
+    /// 使用config进行ping的配置
+    pub fn new(config: Config) -> std::io::Result<Self> {
+        let socket = Socket::new(Domain::IPV4, Type::DGRAM, Some(Protocol::ICMPV4))?;
+        let src = SocketAddr::new(net::IpAddr::V4(Ipv4Addr::UNSPECIFIED), 12549);
+        let dest = SocketAddr::new(config.address.ip, 12549);
+        socket.bind(&src.into())?;
+        // socket.set_ttl(64)?;
+        // socket.set_read_timeout(Some(Duration::from_secs(config.timeout)))?;
+        // socket.set_write_timeout(Some(Duration::from_secs(config.timeout)))?;
+        Ok(Self {
+            config,
+            dest,
+            socket: Arc::new(socket),
+        })
+    }
+    ///# ping主要执行逻辑
+    /// 创建icmpPacket发送给socket
+    pub fn ping(&self, seq_offset: u16) -> anyhow::Result<()> {
+        //创建 icmp request packet
+        let mut buf = vec![0; self.config.packet_size];
+        let mut icmp = MutableEchoRequestPacket::new(&mut buf[..]).expect("InvalidBuffferSize");
+        icmp.set_icmp_type(IcmpTypes::EchoRequest);
+        icmp.set_icmp_code(IcmpCodes::NoCode);
+        icmp.set_identifier(self.config.id);
+        icmp.set_sequence_number(self.config.sequence + seq_offset);
+        icmp.set_checksum(util::checksum(icmp.packet(), 1));
+
+        let start = Instant::now();
+
+        //发送 request
+
+        self.socket.send_to(icmp.packet(), &self.dest.into())?;
+
+        //处理 recv
+        let mut mem_buf =
+            unsafe { &mut *(buf.as_mut_slice() as *mut [u8] as *mut [std::mem::MaybeUninit<u8>]) };
+        let (size, _) = self.socket.recv_from(&mut mem_buf)?;
+
+        let duration = start.elapsed().as_micros() as f64 / 1000.0;
+        let reply = EchoReplyPacket::new(&buf).ok_or(PingError::InvalidPacket)?;
+        println!(
+            "{} bytes from {}: icmp_seq={} ttl={} time={:.2}ms",
+            size,
+            self.config.address.ip,
+            reply.get_sequence_number(),
+            self.config.ttl,
+            duration
+        );
+
+        Ok(())
+    }
+    ///# ping指令多线程运行
+    /// 创建多个线程负责不同的ping函数的执行
+    pub fn run(&self) -> io::Result<()> {
+        println!(
+            "PING {}({})",
+            self.config.address.raw, self.config.address.ip
+        );
+        let _now = Instant::now();
+        let send = Arc::new(AtomicU64::new(0));
+        let _send = send.clone();
+        let this = Arc::new(self.clone());
+
+        let success = Arc::new(AtomicU64::new(0));
+        let _success = success.clone();
+
+        let mut handles = vec![];
+
+        for i in 0..this.config.count {
+            let _this = this.clone();
+            let handle = thread::spawn(move||{
+                _this.ping(i).unwrap();
+            });
+            _send.fetch_add(1, Ordering::SeqCst);
+            handles.push(handle);
+            if i < this.config.count - 1 {
+                thread::sleep(Duration::from_millis(this.config.interval));
+            }
+        }
+
+        for handle in handles {
+            if handle.join().is_ok() {
+                _success.fetch_add(1, Ordering::SeqCst);
+            }
+        }
+
+        let total = _now.elapsed().as_micros() as f64 / 1000.0;
+        let send = send.load(Ordering::SeqCst);
+        let success = success.load(Ordering::SeqCst);
+        let loss_rate = if send > 0 {
+            (send - success) * 100 / send
+        } else {
+            0
+        };
+        println!("\n--- {} ping statistics ---", self.config.address.raw);
+        println!(
+            "{} packets transmitted, {} received, {}% packet loss, time {}ms",
+            send, success, loss_rate, total,
+        );
+        Ok(())
+    }
+}
+
+//TODO: 等待添加ctrl+c发送信号后添加该特性
+// /// # 创建一个进程用于监听用户是否提前退出程序
+// fn signal_notify() -> std::io::Result<Receiver<i32>> {
+//     let (s, r) = bounded(1);
+
+//     let mut signals = signal_hook::iterator::Signals::new(&[SIGINT, SIGTERM])?;
+
+//     thread::spawn(move || {
+//         for signal in signals.forever() {
+//             s.send(signal).unwrap();
+//             break;
+//         }
+//     });
+//     Ok(r)
+// }

+ 2 - 0
user/apps/test-uevent/.cargo/config.toml

@@ -0,0 +1,2 @@
+[build]
+target = "x86_64-unknown-linux-musl"

+ 3 - 0
user/apps/test-uevent/.gitignore

@@ -0,0 +1,3 @@
+/target
+Cargo.lock
+/install/

+ 12 - 0
user/apps/test-uevent/Cargo.toml

@@ -0,0 +1,12 @@
+[package]
+name = "test-uevent"
+version = "0.1.0"
+edition = "2021"
+description = "test for uevent"
+authors = [ "val213 <[email protected]>" ]
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+netlink-sys = "0.5"
+nix = "0.24"

+ 56 - 0
user/apps/test-uevent/Makefile

@@ -0,0 +1,56 @@
+TOOLCHAIN="+nightly-2023-08-15-x86_64-unknown-linux-gnu"
+RUSTFLAGS+=""
+
+ifdef DADK_CURRENT_BUILD_DIR
+# 如果是在dadk中编译,那么安装到dadk的安装目录中
+	INSTALL_DIR = $(DADK_CURRENT_BUILD_DIR)
+else
+# 如果是在本地编译,那么安装到当前目录下的install目录中
+	INSTALL_DIR = ./install
+endif
+
+ifeq ($(ARCH), x86_64)
+	export RUST_TARGET=x86_64-unknown-linux-musl
+else ifeq ($(ARCH), riscv64)
+	export RUST_TARGET=riscv64gc-unknown-linux-gnu
+else 
+# 默认为x86_86,用于本地编译
+	export RUST_TARGET=x86_64-unknown-linux-musl
+endif
+
+run:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) run --target $(RUST_TARGET)
+
+build:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) build --target $(RUST_TARGET)
+
+clean:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) clean --target $(RUST_TARGET)
+
+test:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) test --target $(RUST_TARGET)
+
+doc:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) doc --target $(RUST_TARGET)
+
+fmt:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) fmt
+
+fmt-check:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) fmt --check
+
+run-release:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) run --target $(RUST_TARGET) --release
+
+build-release:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) build --target $(RUST_TARGET) --release
+
+clean-release:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) clean --target $(RUST_TARGET) --release
+
+test-release:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) test --target $(RUST_TARGET) --release
+
+.PHONY: install
+install:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) install --target $(RUST_TARGET) --path . --no-track --root $(INSTALL_DIR) --force

+ 14 - 0
user/apps/test-uevent/README.md

@@ -0,0 +1,14 @@
+# DragonOS Rust-Application Template
+
+您可以使用此模板来创建DragonOS应用程序。
+
+## 使用方法
+
+1. 使用DragonOS的tools目录下的`bootstrap.sh`脚本初始化环境
+2. 在终端输入`cargo install cargo-generate`
+3. 在终端输入`cargo generate --git https://github.com/DragonOS-Community/Rust-App-Template`即可创建项目
+如果您的网络较慢,请使用镜像站`cargo generate --git https://git.mirrors.dragonos.org/DragonOS-Community/Rust-App-Template`
+4. 使用`cargo run`来运行项目
+5. 在DragonOS的`user/dadk/config`目录下,使用`dadk new`命令,创建编译配置,安装到DragonOS的`/`目录下。 
+(在dadk的编译命令选项处,请使用Makefile里面的`make install`配置进行编译、安装)
+6. 编译DragonOS即可安装

+ 150 - 0
user/apps/test-uevent/src/main.rs

@@ -0,0 +1,150 @@
+use libc::{sockaddr, sockaddr_storage, recvfrom, bind, sendto, socket, AF_NETLINK, SOCK_DGRAM, SOCK_CLOEXEC, getpid, c_void};
+use nix::libc;
+use std::os::unix::io::RawFd;
+use std::{ mem, io};
+
+#[repr(C)]
+struct Nlmsghdr {
+    nlmsg_len: u32,
+    nlmsg_type: u16,
+    nlmsg_flags: u16,
+    nlmsg_seq: u32,
+    nlmsg_pid: u32,
+}
+
+fn create_netlink_socket() -> io::Result<RawFd> {
+    let sockfd = unsafe {
+        socket(AF_NETLINK, SOCK_DGRAM | SOCK_CLOEXEC, libc::NETLINK_KOBJECT_UEVENT)
+    };
+
+    if sockfd < 0 {
+        println!("Error: {}", io::Error::last_os_error());
+        return Err(io::Error::last_os_error());
+    }
+
+    Ok(sockfd)
+}
+
+fn bind_netlink_socket(sock: RawFd) -> io::Result<()> {
+    let pid = unsafe { getpid() };
+    let mut addr: libc::sockaddr_nl = unsafe { mem::zeroed() };
+    addr.nl_family = AF_NETLINK as u16;
+    addr.nl_pid = pid as u32;
+    addr.nl_groups = 0;
+
+    let ret = unsafe {
+        bind(sock, &addr as *const _ as *const sockaddr, mem::size_of::<libc::sockaddr_nl>() as u32)
+    };
+
+    if ret < 0 {
+        println!("Error: {}", io::Error::last_os_error());
+        return Err(io::Error::last_os_error());
+    }
+
+    Ok(())
+}
+
+fn send_uevent(sock: RawFd, message: &str) -> io::Result<()> {
+    let mut addr: libc::sockaddr_nl = unsafe { mem::zeroed() };
+    addr.nl_family = AF_NETLINK as u16;
+    addr.nl_pid = 0;
+    addr.nl_groups = 0;
+
+    let nlmsghdr = Nlmsghdr {
+        nlmsg_len: (mem::size_of::<Nlmsghdr>() + message.len()) as u32,
+        nlmsg_type: 0,
+        nlmsg_flags: 0,
+        nlmsg_seq: 0,
+        nlmsg_pid: 0,
+    };
+
+    let mut buffer = Vec::with_capacity(nlmsghdr.nlmsg_len as usize);
+    buffer.extend_from_slice(unsafe {
+        std::slice::from_raw_parts(
+            &nlmsghdr as *const Nlmsghdr as *const u8,
+            mem::size_of::<Nlmsghdr>(),
+        )
+    });
+    buffer.extend_from_slice(message.as_bytes());
+
+    let ret = unsafe {
+        sendto(
+            sock,
+            buffer.as_ptr() as *const c_void,
+            buffer.len(),
+            0,
+            &addr as *const _ as *const sockaddr,
+            mem::size_of::<libc::sockaddr_nl>() as u32,
+        )
+    };
+
+    if ret < 0 {
+        println!("Error: {}", io::Error::last_os_error());
+        return Err(io::Error::last_os_error());
+    }
+
+    Ok(())
+}
+
+fn receive_uevent(sock: RawFd) -> io::Result<String> {
+    // 检查套接字文件描述符是否有效
+    if sock < 0 {
+        println!("Invalid socket file descriptor: {}", sock);
+        return Err(io::Error::new(io::ErrorKind::InvalidInput, "Invalid socket file descriptor"));
+    }
+
+    let mut buf = [0u8; 1024];
+    // let mut addr: sockaddr_storage = unsafe { mem::zeroed() };
+    // let mut addr_len = mem::size_of::<sockaddr_storage>() as u32;
+
+    // 检查缓冲区指针和长度是否有效
+    if buf.is_empty() {
+        println!("Buffer is empty");
+        return Err(io::Error::new(io::ErrorKind::InvalidInput, "Buffer is empty"));
+    }
+    let len = unsafe {
+        recvfrom(
+            sock,
+            buf.as_mut_ptr() as *mut c_void,
+            buf.len(),
+            0,
+            core::ptr::null_mut(), // 不接收发送方地址
+            core::ptr::null_mut(), // 不接收发送方地址长度
+        )
+    };
+    println!("Received {} bytes", len);
+    println!("Received message: {:?}", &buf[..len as usize]);
+    if len < 0 {
+        println!("Error: {}", io::Error::last_os_error());
+        return Err(io::Error::last_os_error());
+    }
+
+    let nlmsghdr_size = mem::size_of::<Nlmsghdr>();
+    if (len as usize) < nlmsghdr_size {
+        println!("Received message is too short");
+        return Err(io::Error::new(io::ErrorKind::InvalidData, "Received message is too short"));
+    }
+
+    let nlmsghdr = unsafe { &*(buf.as_ptr() as *const Nlmsghdr) };
+    if nlmsghdr.nlmsg_len as isize > len {
+        println!("Received message is incomplete");
+        return Err(io::Error::new(io::ErrorKind::InvalidData, "Received message is incomplete"));
+    }
+
+    let message_data = &buf[nlmsghdr_size..nlmsghdr.nlmsg_len as usize];
+    Ok(String::from_utf8_lossy(message_data).to_string())
+}
+
+fn main() {
+    let socket = create_netlink_socket().expect("Failed to create Netlink socket");
+    println!("Netlink socket created successfully");
+
+    bind_netlink_socket(socket).expect("Failed to bind Netlink socket");
+    println!("Netlink socket created and bound successfully");
+
+    send_uevent(socket, "add@/devices/virtual/block/loop0").expect("Failed to send uevent message");
+    println!("Custom uevent message sent successfully");
+
+    let message = receive_uevent(socket).expect("Failed to receive uevent message");
+    println!("Received uevent message: {}", message);
+}

+ 3 - 0
user/apps/test_seqpacket/.gitignore

@@ -0,0 +1,3 @@
+/target
+Cargo.lock
+/install/

+ 12 - 0
user/apps/test_seqpacket/Cargo.toml

@@ -0,0 +1,12 @@
+[package]
+name = "test_seqpacket"
+version = "0.1.0"
+edition = "2021"
+description = "测试seqpacket的socket"
+authors = [ "Saga <[email protected]>" ]
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+nix = "0.26"
+libc = "0.2"

+ 56 - 0
user/apps/test_seqpacket/Makefile

@@ -0,0 +1,56 @@
+TOOLCHAIN=
+RUSTFLAGS=
+
+ifdef DADK_CURRENT_BUILD_DIR
+# 如果是在dadk中编译,那么安装到dadk的安装目录中
+	INSTALL_DIR = $(DADK_CURRENT_BUILD_DIR)
+else
+# 如果是在本地编译,那么安装到当前目录下的install目录中
+	INSTALL_DIR = ./install
+endif
+
+ifeq ($(ARCH), x86_64)
+	export RUST_TARGET=x86_64-unknown-linux-musl
+else ifeq ($(ARCH), riscv64)
+	export RUST_TARGET=riscv64gc-unknown-linux-gnu
+else 
+# 默认为x86_86,用于本地编译
+	export RUST_TARGET=x86_64-unknown-linux-musl
+endif
+
+run:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) run --target $(RUST_TARGET)
+
+build:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) build --target $(RUST_TARGET)
+
+clean:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) clean --target $(RUST_TARGET)
+
+test:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) test --target $(RUST_TARGET)
+
+doc:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) doc --target $(RUST_TARGET)
+
+fmt:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) fmt
+
+fmt-check:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) fmt --check
+
+run-release:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) run --target $(RUST_TARGET) --release
+
+build-release:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) build --target $(RUST_TARGET) --release
+
+clean-release:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) clean --target $(RUST_TARGET) --release
+
+test-release:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) test --target $(RUST_TARGET) --release
+
+.PHONY: install
+install:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) install --target $(RUST_TARGET) --path . --no-track --root $(INSTALL_DIR) --force

+ 14 - 0
user/apps/test_seqpacket/README.md

@@ -0,0 +1,14 @@
+# DragonOS Rust-Application Template
+
+您可以使用此模板来创建DragonOS应用程序。
+
+## 使用方法
+
+1. 使用DragonOS的tools目录下的`bootstrap.sh`脚本初始化环境
+2. 在终端输入`cargo install cargo-generate`
+3. 在终端输入`cargo generate --git https://github.com/DragonOS-Community/Rust-App-Template`即可创建项目
+如果您的网络较慢,请使用镜像站`cargo generate --git https://git.mirrors.dragonos.org/DragonOS-Community/Rust-App-Template`
+4. 使用`cargo run`来运行项目
+5. 在DragonOS的`user/dadk/config`目录下,使用`dadk new`命令,创建编译配置,安装到DragonOS的`/`目录下。 
+(在dadk的编译命令选项处,请使用Makefile里面的`make install`配置进行编译、安装)
+6. 编译DragonOS即可安装

+ 190 - 0
user/apps/test_seqpacket/src/main.rs

@@ -0,0 +1,190 @@
+mod seq_socket;
+mod seq_pair;
+
+use seq_socket::test_seq_socket;
+use seq_pair::test_seq_pair;
+
+fn main() -> Result<(), std::io::Error> {
+    if let Err(e) = test_seq_socket() {
+        println!("[ fault ] test_seq_socket, err: {}", e);
+    } else {
+        println!("[success] test_seq_socket");
+    }
+
+    if let Err(e) = test_seq_pair() {
+        println!("[ fault ] test_seq_pair, err: {}", e);
+    } else {
+        println!("[success] test_seq_pair");
+    }
+
+    Ok(())
+}
+
+// use nix::sys::socket::{socketpair, AddressFamily, SockFlag, SockType};
+// use std::fs::File;
+// use std::io::{Read, Write};
+// use std::os::fd::FromRawFd;
+// use std::{fs, str};
+
+// use libc::*;
+// use std::ffi::CString;
+// use std::io::Error;
+// use std::mem;
+// use std::os::unix::io::RawFd;
+// use std::ptr;
+
+// const SOCKET_PATH: &str = "/test.seqpacket";
+// const MSG: &str = "Hello, Unix SEQPACKET socket!";
+
+// fn create_seqpacket_socket() -> Result<RawFd, Error> {
+//     unsafe {
+//         let fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+//         if fd == -1 {
+//             return Err(Error::last_os_error());
+//         }
+//         Ok(fd)
+//     }
+// }
+
+// fn bind_socket(fd: RawFd) -> Result<(), Error> {
+//     unsafe {
+//         let mut addr = sockaddr_un {
+//             sun_family: AF_UNIX as u16,
+//             sun_path: [0; 108],
+//         };
+//         let path_cstr = CString::new(SOCKET_PATH).unwrap();
+//         let path_bytes = path_cstr.as_bytes();
+//         for (i, &byte) in path_bytes.iter().enumerate() {
+//             addr.sun_path[i] = byte as i8;
+//         }
+
+//         if bind(fd, &addr as *const _ as *const sockaddr, mem::size_of_val(&addr) as socklen_t) == -1 {
+//             return Err(Error::last_os_error());
+//         }
+//     }
+//     Ok(())
+// }
+
+// fn listen_socket(fd: RawFd) -> Result<(), Error> {
+//     unsafe {
+//         if listen(fd, 5) == -1 {
+//             return Err(Error::last_os_error());
+//         }
+//     }
+//     Ok(())
+// }
+
+// fn accept_connection(fd: RawFd) -> Result<RawFd, Error> {
+//     unsafe {
+//         // let mut addr = sockaddr_un {
+//         //     sun_family: AF_UNIX as u16,
+//         //     sun_path: [0; 108],
+//         // };
+//         // let mut len = mem::size_of_val(&addr) as socklen_t;
+//         let client_fd = accept(fd, std::ptr::null_mut(), std::ptr::null_mut());
+//         if client_fd == -1 {
+//             return Err(Error::last_os_error());
+//         }
+//         Ok(client_fd)
+//     }
+// }
+
+// fn send_message(fd: RawFd, msg: &str) -> Result<(), Error> {
+//     unsafe {
+//         let msg_bytes = msg.as_bytes();
+//         if send(fd, msg_bytes.as_ptr() as *const libc::c_void, msg_bytes.len(), 0) == -1 {
+//             return Err(Error::last_os_error());
+//         }
+//     }
+//     Ok(())
+// }
+
+// fn receive_message(fd: RawFd) -> Result<String, Error> {
+//     let mut buffer = [0; 1024];
+//     unsafe {
+//         let len = recv(fd, buffer.as_mut_ptr() as *mut libc::c_void, buffer.len(), 0);
+//         if len == -1 {
+//             return Err(Error::last_os_error());
+//         }
+//         Ok(String::from_utf8_lossy(&buffer[..len as usize]).into_owned())
+//     }
+// }
+// fn main() -> Result<(), Error> {
+//     // Create and bind the server socket
+//     fs::remove_file(&SOCKET_PATH).ok();
+
+//     let server_fd = create_seqpacket_socket()?;
+//     bind_socket(server_fd)?;
+//     listen_socket(server_fd)?;
+
+//     // Accept connection in a separate thread
+//     let server_thread = std::thread::spawn(move || {
+//         let client_fd = accept_connection(server_fd).expect("Failed to accept connection");
+
+//         // Receive and print message
+//         let received_msg = receive_message(client_fd).expect("Failed to receive message");
+//         println!("Server: Received message: {}", received_msg);
+
+//         // Close client connection
+//         unsafe { close(client_fd) };
+//     });
+
+//     // Create and connect the client socket
+//     let client_fd = create_seqpacket_socket()?;
+//     unsafe {
+//         let mut addr = sockaddr_un {
+//             sun_family: AF_UNIX as u16,
+//             sun_path: [0; 108],
+//         };
+//         let path_cstr = CString::new(SOCKET_PATH).unwrap();
+//         let path_bytes = path_cstr.as_bytes();
+//         // Convert u8 to i8
+//         for (i, &byte) in path_bytes.iter().enumerate() {
+//             addr.sun_path[i] = byte as i8;
+//         }
+//         if connect(client_fd, &addr as *const _ as *const sockaddr, mem::size_of_val(&addr) as socklen_t) == -1 {
+//             return Err(Error::last_os_error());
+//         }
+//     }
+//     send_message(client_fd, MSG)?;
+
+//     // Close client connection
+//     unsafe { close(client_fd) };
+
+//     // Wait for server thread to complete
+//     server_thread.join().expect("Server thread panicked");
+//     fs::remove_file(&SOCKET_PATH).ok();
+
+//         // 创建 socket pair
+//     let (sock1, sock2) = socketpair(
+//         AddressFamily::Unix,
+//         SockType::SeqPacket, // 使用 SeqPacket 类型
+//         None,                // 协议默认
+//         SockFlag::empty(),
+//     ).expect("Failed to create socket pair");
+
+//     let mut socket1 = unsafe { File::from_raw_fd(sock1) };
+//     let mut socket2 = unsafe { File::from_raw_fd(sock2) };
+//     // sock1 写入数据
+//     let msg = b"hello from sock1";
+//     socket1.write_all(msg)?;
+//     println!("sock1 send: {:?}", String::from_utf8_lossy(&msg[..]));
+
+//     // 因os read和write时会调整file的offset,write会对offset和meta size(目前返回的都是0)进行比较,
+//     // 而read不会,故双socket都先send,后recv
+
+//     // sock2 回复数据
+//     let reply = b"hello from sock2";
+//     socket2.write_all(reply)?;
+//     println!("sock2 send: {:?}", String::from_utf8_lossy(reply));
+
+//     // sock2 读取数据
+//     let mut buf = [0u8; 128];
+//     let len = socket2.read(&mut buf)?;
+//     println!("sock2 receive: {:?}", String::from_utf8_lossy(&buf[..len]));
+
+//     // sock1 读取回复
+//     let len = socket1.read(&mut buf)?;
+//     println!("sock1 receive: {:?}", String::from_utf8_lossy(&buf[..len]));
+//     Ok(())
+// }

+ 39 - 0
user/apps/test_seqpacket/src/seq_pair.rs

@@ -0,0 +1,39 @@
+use nix::sys::socket::{socketpair, AddressFamily, SockFlag, SockType};
+use std::fs::File;
+use std::io::{Read, Write,Error};
+use std::os::fd::FromRawFd;
+
+pub fn test_seq_pair()->Result<(),Error>{
+    // 创建 socket pair
+    let (sock1, sock2) = socketpair(
+        AddressFamily::Unix,
+        SockType::SeqPacket, // 使用 SeqPacket 类型
+        None,                // 协议默认
+        SockFlag::empty(),
+    ).expect("Failed to create socket pair");
+
+    let mut socket1 = unsafe { File::from_raw_fd(sock1) };
+    let mut socket2 = unsafe { File::from_raw_fd(sock2) };
+    // sock1 写入数据
+    let msg = b"hello from sock1";
+    socket1.write_all(msg)?;
+    println!("sock1 send: {:?}", String::from_utf8_lossy(&msg[..]));
+
+    // 因os read和write时会调整file的offset,write会对offset和meta size(目前返回的都是0)进行比较,
+    // 而read不会,故双socket都先send,后recv
+
+    // sock2 回复数据
+    let reply = b"hello from sock2";
+    socket2.write_all(reply)?;
+    println!("sock2 send: {:?}", String::from_utf8_lossy(reply));
+
+    // sock2 读取数据
+    let mut buf = [0u8; 128];
+    let len = socket2.read(&mut buf)?;
+    println!("sock2 receive: {:?}", String::from_utf8_lossy(&buf[..len]));
+
+    // sock1 读取回复
+    let len = socket1.read(&mut buf)?;
+    println!("sock1 receive: {:?}", String::from_utf8_lossy(&buf[..len]));
+    Ok(())
+}

+ 155 - 0
user/apps/test_seqpacket/src/seq_socket.rs

@@ -0,0 +1,155 @@
+
+use libc::*;
+use std::{fs, str};
+use std::ffi::CString;
+use std::io::Error;
+use std::mem;
+use std::os::unix::io::RawFd;
+
+const SOCKET_PATH: &str = "/test.seqpacket";
+const MSG1: &str = "Hello, Unix SEQPACKET socket from Client!";
+const MSG2: &str = "Hello, Unix SEQPACKET socket from Server!";
+
+
+fn create_seqpacket_socket() -> Result<RawFd, Error> {
+    unsafe {
+        let fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+        if fd == -1 {
+            return Err(Error::last_os_error());
+        }
+        Ok(fd)
+    }
+}
+
+fn bind_socket(fd: RawFd) -> Result<(), Error> {
+    unsafe {
+        let mut addr = sockaddr_un {
+            sun_family: AF_UNIX as u16,
+            sun_path: [0; 108],
+        };
+        let path_cstr = CString::new(SOCKET_PATH).unwrap();
+        let path_bytes = path_cstr.as_bytes();
+        for (i, &byte) in path_bytes.iter().enumerate() {
+            addr.sun_path[i] = byte as i8;
+        }
+
+        if bind(fd, &addr as *const _ as *const sockaddr, mem::size_of_val(&addr) as socklen_t) == -1 {
+            return Err(Error::last_os_error());
+        }
+    }
+    Ok(())
+}
+
+fn listen_socket(fd: RawFd) -> Result<(), Error> {
+    unsafe {
+        if listen(fd, 5) == -1 {
+            return Err(Error::last_os_error());
+        }
+    }
+    Ok(())
+}
+
+fn accept_connection(fd: RawFd) -> Result<RawFd, Error> {
+    unsafe {
+        // let mut addr = sockaddr_un {
+        //     sun_family: AF_UNIX as u16,
+        //     sun_path: [0; 108],
+        // };
+        // let mut len = mem::size_of_val(&addr) as socklen_t;
+        // let client_fd = accept(fd, &mut addr as *mut _ as *mut sockaddr, &mut len);
+        let client_fd = accept(fd, std::ptr::null_mut(), std::ptr::null_mut());
+        if client_fd == -1 {
+            return Err(Error::last_os_error());
+        }
+        Ok(client_fd)
+    }
+}
+
+fn send_message(fd: RawFd, msg: &str) -> Result<(), Error> {
+    unsafe {
+        let msg_bytes = msg.as_bytes();
+        if send(fd, msg_bytes.as_ptr() as *const libc::c_void, msg_bytes.len(), 0) == -1 {
+            return Err(Error::last_os_error());
+        }
+    }
+    Ok(())
+}
+
+fn receive_message(fd: RawFd) -> Result<String, Error> {
+    let mut buffer = [0; 1024];
+    unsafe {
+        let len = recv(fd, buffer.as_mut_ptr() as *mut libc::c_void, buffer.len(), 0);
+        if len == -1 {
+            return Err(Error::last_os_error());
+        }
+        Ok(String::from_utf8_lossy(&buffer[..len as usize]).into_owned())
+    }
+}
+
+pub fn test_seq_socket() ->Result<(), Error>{
+        // Create and bind the server socket
+        fs::remove_file(&SOCKET_PATH).ok();
+
+        let server_fd = create_seqpacket_socket()?;
+        bind_socket(server_fd)?;
+        listen_socket(server_fd)?;
+
+        // Accept connection in a separate thread
+        let server_thread = std::thread::spawn(move || {
+            let client_fd = accept_connection(server_fd).expect("Failed to accept connection");
+    
+            // Receive and print message
+            let received_msg = receive_message(client_fd).expect("Failed to receive message");
+            println!("Server: Received message: {}", received_msg);
+            
+            send_message(client_fd, MSG2).expect("Failed to send message");
+    
+            // Close client connection
+            unsafe { close(client_fd) };
+        });
+    
+        // Create and connect the client socket
+        let client_fd = create_seqpacket_socket()?;
+        unsafe {
+            let mut addr = sockaddr_un {
+                sun_family: AF_UNIX as u16,
+                sun_path: [0; 108],
+            };
+            let path_cstr = CString::new(SOCKET_PATH).unwrap();
+            let path_bytes = path_cstr.as_bytes();
+            // Convert u8 to i8
+            for (i, &byte) in path_bytes.iter().enumerate() {
+                addr.sun_path[i] = byte as i8;
+            }
+            if connect(client_fd, &addr as *const _ as *const sockaddr, mem::size_of_val(&addr) as socklen_t) == -1 {
+                return Err(Error::last_os_error());
+            }
+        }
+        send_message(client_fd, MSG1)?;
+        let received_msg = receive_message(client_fd).expect("Failed to receive message");
+        println!("Client: Received message: {}", received_msg);
+        // get peer_name
+        unsafe {
+            let mut addrss = sockaddr_un {
+                sun_family: AF_UNIX as u16,
+                sun_path: [0; 108],
+            };
+            let mut len = mem::size_of_val(&addrss) as socklen_t;
+            let res = getpeername(client_fd, &mut addrss as *mut _ as *mut sockaddr, &mut len);
+            if res == -1 {
+                return Err(Error::last_os_error());
+            }
+            let sun_path = addrss.sun_path.clone();
+            let peer_path:[u8;108] = sun_path.iter().map(|&x| x as u8).collect::<Vec<u8>>().try_into().unwrap();
+            println!("Client: Connected to server at path: {}", String::from_utf8_lossy(&peer_path));
+
+        }
+            
+        server_thread.join().expect("Server thread panicked");
+        let received_msg = receive_message(client_fd).expect("Failed to receive message");
+        println!("Client: Received message: {}", received_msg);
+        // Close client connection
+        unsafe { close(client_fd) };
+        fs::remove_file(&SOCKET_PATH).ok();
+        Ok(())
+}

+ 3 - 0
user/apps/test_unix_stream_socket/.gitignore

@@ -0,0 +1,3 @@
+/target
+Cargo.lock
+/install/

+ 11 - 0
user/apps/test_unix_stream_socket/Cargo.toml

@@ -0,0 +1,11 @@
+[package]
+name = "test_unix_stream_socket"
+version = "0.1.0"
+edition = "2021"
+description = "test for unix stream socket"
+authors = [ "smallcjy <[email protected]>" ]
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+libc = "0.2.158"

+ 56 - 0
user/apps/test_unix_stream_socket/Makefile

@@ -0,0 +1,56 @@
+TOOLCHAIN=
+RUSTFLAGS=
+
+ifdef DADK_CURRENT_BUILD_DIR
+# 如果是在dadk中编译,那么安装到dadk的安装目录中
+	INSTALL_DIR = $(DADK_CURRENT_BUILD_DIR)
+else
+# 如果是在本地编译,那么安装到当前目录下的install目录中
+	INSTALL_DIR = ./install
+endif
+
+ifeq ($(ARCH), x86_64)
+	export RUST_TARGET=x86_64-unknown-linux-musl
+else ifeq ($(ARCH), riscv64)
+	export RUST_TARGET=riscv64gc-unknown-linux-gnu
+else 
+# 默认为x86_86,用于本地编译
+	export RUST_TARGET=x86_64-unknown-linux-musl
+endif
+
+run:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) run --target $(RUST_TARGET)
+
+build:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) build --target $(RUST_TARGET)
+
+clean:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) clean --target $(RUST_TARGET)
+
+test:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) test --target $(RUST_TARGET)
+
+doc:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) doc --target $(RUST_TARGET)
+
+fmt:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) fmt
+
+fmt-check:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) fmt --check
+
+run-release:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) run --target $(RUST_TARGET) --release
+
+build-release:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) build --target $(RUST_TARGET) --release
+
+clean-release:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) clean --target $(RUST_TARGET) --release
+
+test-release:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) test --target $(RUST_TARGET) --release
+
+.PHONY: install
+install:
+	RUSTFLAGS=$(RUSTFLAGS) cargo $(TOOLCHAIN) install --target $(RUST_TARGET) --path . --no-track --root $(INSTALL_DIR) --force

+ 5 - 0
user/apps/test_unix_stream_socket/README.md

@@ -0,0 +1,5 @@
+# unix stream socket 测试程序
+
+## 测试思路
+
+跨线程通信,一个线程作为服务端监听一个测试文件,另一个线程作为客户端连接监听的文件。若连接成功,测试能够正常通信。

+ 153 - 0
user/apps/test_unix_stream_socket/src/main.rs

@@ -0,0 +1,153 @@
+use std::io::Error;
+use std::os::fd::RawFd;
+use std::fs;
+use libc::*;
+use std::ffi::CString;
+use std::mem;
+
+const SOCKET_PATH: &str = "/test.stream";
+const MSG1: &str = "Hello, unix stream socket from Client!";
+const MSG2: &str = "Hello, unix stream socket from Server!";
+
+fn create_stream_socket() -> Result<RawFd, Error>{
+    unsafe {
+        let fd = socket(AF_UNIX, SOCK_STREAM, 0);
+        if fd == -1 {
+            return Err(Error::last_os_error())
+        }
+        Ok(fd)
+    }
+}
+
+fn bind_socket(fd: RawFd) -> Result<(), Error> {
+    unsafe {
+        let mut addr = sockaddr_un {
+            sun_family: AF_UNIX as u16,
+            sun_path: [0; 108],
+        };
+        let path_cstr = CString::new(SOCKET_PATH).unwrap();
+        let path_bytes = path_cstr.as_bytes();
+        for (i, &byte) in path_bytes.iter().enumerate() {
+            addr.sun_path[i] = byte as i8;
+        }
+
+        if bind(fd, &addr as *const _ as *const sockaddr, mem::size_of_val(&addr) as socklen_t) == -1 {
+            return Err(Error::last_os_error());
+        }
+    }
+
+    Ok(())
+}
+
+fn listen_socket(fd: RawFd) -> Result<(), Error> {
+    unsafe {
+        if listen(fd, 5) == -1 {
+            return Err(Error::last_os_error());
+        }
+    }
+    Ok(())
+}
+
+fn accept_conn(fd: RawFd) -> Result<RawFd, Error> {
+    unsafe {
+        let client_fd = accept(fd, std::ptr::null_mut(), std::ptr::null_mut());
+        if client_fd == -1 {
+            return Err(Error::last_os_error());
+        }
+        Ok(client_fd)
+    }
+}
+
+fn send_message(fd: RawFd, msg: &str) -> Result<(), Error> {
+    unsafe {
+        let msg_bytes = msg.as_bytes();
+        if send(fd, msg_bytes.as_ptr() as *const libc::c_void, msg_bytes.len(), 0)== -1 {
+            return Err(Error::last_os_error());
+        }
+    }
+    Ok(())
+}
+
+fn recv_message(fd: RawFd) -> Result<String, Error> {
+    let mut buffer = [0; 1024];
+    unsafe {
+        let len = recv(fd, buffer.as_mut_ptr() as *mut libc::c_void, buffer.len(),0);
+        if len == -1 {
+            return Err(Error::last_os_error());
+        }
+        Ok(String::from_utf8_lossy(&buffer[..len as usize]).into_owned())
+    }
+}
+
+fn test_stream() -> Result<(), Error> {
+    fs::remove_file(&SOCKET_PATH).ok();
+
+    let server_fd =  create_stream_socket()?;
+    bind_socket(server_fd)?;
+    listen_socket(server_fd)?;
+
+    let server_thread = std::thread::spawn(move || {
+        let client_fd = accept_conn(server_fd).expect("Failed to accept connection");
+        println!("accept success!");
+        let recv_msg = recv_message(client_fd).expect("Failed to receive message");
+
+        println!("Server: Received message: {}", recv_msg);
+        send_message(client_fd, MSG2).expect("Failed to send message");
+        println!("Server send finish");
+
+        unsafe {close(client_fd)};
+    });
+
+    let client_fd = create_stream_socket()?;
+    unsafe {
+        let mut addr = sockaddr_un {
+            sun_family: AF_UNIX as u16,
+            sun_path: [0; 108],
+        };
+        let path_cstr = CString::new(SOCKET_PATH).unwrap();
+        let path_bytes = path_cstr.as_bytes();
+
+        for (i, &byte) in path_bytes.iter().enumerate() {
+            addr.sun_path[i] = byte as i8;
+        }
+
+        if connect(client_fd, &addr as *const _ as *const sockaddr, mem::size_of_val(&addr) as socklen_t) == -1 {
+            return Err(Error::last_os_error());
+        } 
+    }
+
+    send_message(client_fd, MSG1)?;
+    // get peer_name
+    unsafe {
+        let mut addrss = sockaddr_un {
+            sun_family: AF_UNIX as u16,
+            sun_path: [0; 108],
+        };
+        let mut len = mem::size_of_val(&addrss) as socklen_t;
+        let res = getpeername(client_fd, &mut addrss as *mut _ as *mut sockaddr, &mut len);
+        if res == -1 {
+            return Err(Error::last_os_error());
+        }
+        let sun_path = addrss.sun_path.clone();
+        let peer_path:[u8;108] = sun_path.iter().map(|&x| x as u8).collect::<Vec<u8>>().try_into().unwrap();
+        println!("Client: Connected to server at path: {}", String::from_utf8_lossy(&peer_path));
+
+    }
+
+    server_thread.join().expect("Server thread panicked");
+    println!("Client try recv!");
+    let recv_msg = recv_message(client_fd).expect("Failed to receive message from server");
+    println!("Client Received message: {}", recv_msg);
+
+    unsafe {close(client_fd)};
+    fs::remove_file(&SOCKET_PATH).ok();
+
+    Ok(())
+}
+
+fn main() {
+    match test_stream() {
+        Ok(_) => println!("test for unix stream success"),
+        Err(_) => println!("test for unix stream failed")
+    }
+}

+ 24 - 0
user/dadk/config/ping_0_1_0.dadk

@@ -0,0 +1,24 @@
+{
+  "name": "ping",
+  "version": "0.1.0",
+  "description": "ping用户程序",
+  "task_type": {
+    "BuildFromSource": {
+      "Local": {
+        "path": "apps/ping"
+      }
+    }
+  },
+  "depends": [],
+  "build": {
+    "build_command": "make install"
+  },
+  "install": {
+    "in_dragonos_path": "/usr"
+  },
+  "clean": {
+    "clean_command": "make clean"
+  },
+  "envs": [],
+  "target_arch": ["x86_64"]
+}

+ 29 - 0
user/dadk/config/test_seqpacket_0_1_0.dadk

@@ -0,0 +1,29 @@
+{
+  "name": "test_seqpacket",
+  "version": "0.1.0",
+  "description": "对seqpacket_pair的简单测试",
+  "rust_target": null,
+  "task_type": {
+    "BuildFromSource": {
+      "Local": {
+        "path": "apps/test_seqpacket"
+      }
+    }
+  },
+  "depends": [],
+  "build": {
+    "build_command": "make install"
+  },
+  "install": {
+    "in_dragonos_path": "/"
+  },
+  "clean": {
+    "clean_command": "make clean"
+  },
+  "envs": [],
+  "build_once": false,
+  "install_once": false,
+  "target_arch": [
+    "x86_64"
+  ]
+}

+ 29 - 0
user/dadk/config/test_stream_socket_0_1_0.dadk

@@ -0,0 +1,29 @@
+{
+  "name": "test_stream_socket",
+  "version": "0.1.0",
+  "description": "test for unix stream socket",
+  "rust_target": null,
+  "task_type": {
+    "BuildFromSource": {
+      "Local": {
+        "path": "apps/test_unix_stream_socket"
+      }
+    }
+  },
+  "depends": [],
+  "build": {
+    "build_command": "make install"
+  },
+  "install": {
+    "in_dragonos_path": "/"
+  },
+  "clean": {
+    "clean_command": "make clean"
+  },
+  "envs": [],
+  "build_once": false,
+  "install_once": false,
+  "target_arch": [
+    "x86_64"
+  ]
+}

Some files were not shown because too many files changed in this diff