Add tool: netstat.

Including libraries: libutil,libmemstat,libxo.
dev
logwang 2017-08-31 21:34:50 +08:00
parent eec9e77f67
commit 1eaf0ac36a
542 changed files with 116527 additions and 14 deletions

View File

@ -115,6 +115,17 @@ SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RWTUN | CTLFLAG_VNET,
VNET_DEFINE(struct rtstat, rtstat);
#define V_rtstat VNET(rtstat)
#ifdef FSTACK
static int
sysctl_rtstat(SYSCTL_HANDLER_ARGS)
{
return (SYSCTL_OUT(req, &V_rtstat, sizeof(struct rtstat)));
}
SYSCTL_PROC(_net, OID_AUTO, rtstat, CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_RD,
0, 0, sysctl_rtstat, "S,rtstat", "Routing statistics.");
#endif
VNET_DEFINE(struct rib_head *, rt_tables);
#define V_rt_tables VNET(rt_tables)

View File

@ -271,6 +271,7 @@ NETINET_SRCS+= \
ip_icmp.c \
ip_id.c \
ip_input.c \
ip_mroute.c \
ip_options.c \
ip_output.c \
raw_ip.c \

View File

@ -104,6 +104,8 @@ SYSINIT(configure3, SI_SUB_CONFIGURE, SI_ORDER_ANY, configure_final, NULL);
volatile int ticks;
int cpu_disable_deep_sleep;
static int sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS);
/* This is used in modules that need to work in both SMP and UP. */
cpuset_t all_cpus;
@ -114,6 +116,31 @@ int mp_maxcpus = MAXCPU;
volatile int smp_started;
u_int mp_maxid;
static SYSCTL_NODE(_kern, OID_AUTO, smp, CTLFLAG_RD|CTLFLAG_CAPRD, NULL,
"Kernel SMP");
SYSCTL_INT(_kern_smp, OID_AUTO, maxid, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxid, 0,
"Max CPU ID.");
SYSCTL_INT(_kern_smp, OID_AUTO, maxcpus, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxcpus,
0, "Max number of CPUs that the system was compiled for.");
SYSCTL_PROC(_kern_smp, OID_AUTO, active, CTLFLAG_RD | CTLTYPE_INT, NULL, 0,
sysctl_kern_smp_active, "I", "Indicates system is running in SMP mode");
int smp_disabled = 0; /* has smp been disabled? */
SYSCTL_INT(_kern_smp, OID_AUTO, disabled, CTLFLAG_RDTUN|CTLFLAG_CAPRD,
&smp_disabled, 0, "SMP has been disabled from the loader");
int smp_cpus = 1; /* how many cpu's running */
SYSCTL_INT(_kern_smp, OID_AUTO, cpus, CTLFLAG_RD|CTLFLAG_CAPRD, &smp_cpus, 0,
"Number of CPUs online");
int smp_topology = 0; /* Which topology we're using. */
SYSCTL_INT(_kern_smp, OID_AUTO, topology, CTLFLAG_RDTUN, &smp_topology, 0,
"Topology override setting; 0 is default provided by hardware.");
long first_page = 0;
struct vmmeter vm_cnt;
@ -139,6 +166,17 @@ int cpu_disable_c3_sleep = 0; /* Timer dies in C3. */
static void timevalfix(struct timeval *);
/* Extra care is taken with this sysctl because the data type is volatile */
static int
sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS)
{
int error, active;
active = smp_started;
error = SYSCTL_OUT(req, &active, sizeof(active));
return (error);
}
void
procinit()
{

View File

@ -79,7 +79,7 @@ struct ff_msg {
/* Result of msg processing */
int result;
/* Length of segment buffer. */
uint16_t buf_len;
size_t buf_len;
/* Address of segment buffer. */
char *buf_addr;

View File

@ -1,4 +1,4 @@
SUBDIRS=compat sysctl ifconfig route top
SUBDIRS=compat libutil libmemstat libxo sysctl ifconfig route top netstat
all:
for d in $(SUBDIRS); do ( cd $$d; $(MAKE) all ) ; done

View File

@ -110,6 +110,34 @@ Examples:
| 99.79%| 0.00%| 0.21%| 8147676|
```
# netstat
Usage:
```
netstat -P <f-stack proc_id> [-46AaLnRSTWx] [-f protocol_family | -p protocol]
netstat -P <f-stack proc_id> -i | -I interface [-46abdhnW] [-f address_family]
netstat -P <f-stack proc_id> -w wait [-I interface] [-46d] [-q howmany]
netstat -P <f-stack proc_id> -s [-46sz] [-f protocol_family | -p protocol]
netstat -P <f-stack proc_id> -i | -I interface -s [-46s]
[-f protocol_family | -p protocol]
netstat -P <f-stack proc_id> -B [-z] [-I interface]
netstat -P <f-stack proc_id> -r [-46AnW] [-F fibnum] [-f address_family]
netstat -P <f-stack proc_id> -rs [-s]
netstat -P <f-stack proc_id> -g [-46W] [-f address_family]
netstat -P <f-stack proc_id> -gs [-46s] [-f address_family]
netstat -P <f-stack proc_id> -Q
```
Unsupported commands or features:
```
-M
-N
-m
ipv6
netgraph
ipsec
```
For more details, see [Manual page](https://www.freebsd.org/cgi/man.cgi?netstat).
# how to implement a custom tool for communicating with F-Stack process

View File

@ -37,6 +37,10 @@
#define __dead __dead2
#endif
#ifndef __unused
#define __unused __attribute__((__unused__))
#endif
#ifndef nitems
#define nitems(x) (sizeof((x)) / sizeof((x)[0]))
#endif
@ -45,6 +49,10 @@
#define __FBSDID(s) /* nothing */
#endif
#ifndef _PATH_ETC
#define _PATH_ETC "/etc"
#endif
void *reallocf(void *ptr, size_t size);
int feature_present(const char *feature);
@ -57,4 +65,6 @@ size_t strlcpy(char * __restrict dst, const char * __restrict src,
long long strtonum(const char *numstr, long long minval,
long long maxval, const char **errstrp);
const char *getprogname(void);
#endif

195
tools/compat/getifmaddrs.c Normal file
View File

@ -0,0 +1,195 @@
/*
* Copyright (c) 2003 Bruce M. Simpson.
* All rights reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
#include <sys/param.h>
#include <sys/sysctl.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <errno.h>
#include <ifaddrs.h>
#include <stdlib.h>
#include <string.h>
#define SALIGN (sizeof(long) - 1)
#define SA_RLEN(sa) ((sa)->sa_len ? (((sa)->sa_len + SALIGN) & ~SALIGN) : \
(SALIGN + 1))
#define MAX_SYSCTL_TRY 5
#define RTA_MASKS (RTA_GATEWAY | RTA_IFP | RTA_IFA)
int
getifmaddrs(struct ifmaddrs **pif)
{
int icnt = 1;
int dcnt = 0;
int ntry = 0;
size_t len;
size_t needed;
int mib[6];
int i;
char *buf;
char *data;
char *next;
char *p;
struct ifma_msghdr *ifmam;
struct ifmaddrs *ifa, *ift;
struct rt_msghdr *rtm;
struct sockaddr *sa;
mib[0] = CTL_NET;
mib[1] = PF_ROUTE;
mib[2] = 0; /* protocol */
mib[3] = 0; /* wildcard address family */
mib[4] = NET_RT_IFMALIST;
mib[5] = 0; /* no flags */
do {
if (sysctl(mib, 6, NULL, &needed, NULL, 0) < 0)
return (-1);
if ((buf = malloc(needed)) == NULL)
return (-1);
if (sysctl(mib, 6, buf, &needed, NULL, 0) < 0) {
if (errno != ENOMEM || ++ntry >= MAX_SYSCTL_TRY) {
free(buf);
return (-1);
}
free(buf);
buf = NULL;
}
} while (buf == NULL);
for (next = buf; next < buf + needed; next += rtm->rtm_msglen) {
rtm = (struct rt_msghdr *)(void *)next;
if (rtm->rtm_version != RTM_VERSION)
continue;
switch (rtm->rtm_type) {
case RTM_NEWMADDR:
ifmam = (struct ifma_msghdr *)(void *)rtm;
if ((ifmam->ifmam_addrs & RTA_IFA) == 0)
break;
icnt++;
p = (char *)(ifmam + 1);
for (i = 0; i < RTAX_MAX; i++) {
if ((RTA_MASKS & ifmam->ifmam_addrs &
(1 << i)) == 0)
continue;
sa = (struct sockaddr *)(void *)p;
len = SA_RLEN(sa);
dcnt += len;
p += len;
}
break;
}
}
data = malloc(sizeof(struct ifmaddrs) * icnt + dcnt);
if (data == NULL) {
free(buf);
return (-1);
}
ifa = (struct ifmaddrs *)(void *)data;
data += sizeof(struct ifmaddrs) * icnt;
memset(ifa, 0, sizeof(struct ifmaddrs) * icnt);
ift = ifa;
for (next = buf; next < buf + needed; next += rtm->rtm_msglen) {
rtm = (struct rt_msghdr *)(void *)next;
if (rtm->rtm_version != RTM_VERSION)
continue;
switch (rtm->rtm_type) {
case RTM_NEWMADDR:
ifmam = (struct ifma_msghdr *)(void *)rtm;
if ((ifmam->ifmam_addrs & RTA_IFA) == 0)
break;
p = (char *)(ifmam + 1);
for (i = 0; i < RTAX_MAX; i++) {
if ((RTA_MASKS & ifmam->ifmam_addrs &
(1 << i)) == 0)
continue;
sa = (struct sockaddr *)(void *)p;
len = SA_RLEN(sa);
switch (i) {
case RTAX_GATEWAY:
ift->ifma_lladdr =
(struct sockaddr *)(void *)data;
memcpy(data, p, len);
data += len;
break;
case RTAX_IFP:
ift->ifma_name =
(struct sockaddr *)(void *)data;
memcpy(data, p, len);
data += len;
break;
case RTAX_IFA:
ift->ifma_addr =
(struct sockaddr *)(void *)data;
memcpy(data, p, len);
data += len;
break;
default:
data += len;
break;
}
p += len;
}
ift->ifma_next = ift + 1;
ift = ift->ifma_next;
break;
}
}
free(buf);
if (ift > ifa) {
ift--;
ift->ifma_next = NULL;
*pif = ifa;
} else {
*pif = NULL;
free(ifa);
}
return (0);
}
void
freeifmaddrs(struct ifmaddrs *ifmp)
{
free(ifmp);
}

View File

@ -46,6 +46,8 @@ __FBSDID("$FreeBSD$");
#include "un-namespace.h"
#include "libc_private.h"
#else
#include "compat.h"
#endif
int opterr = 1, /* if error message should be printed */
@ -103,7 +105,7 @@ getopt(int nargc, char * const nargv[], const char *ostr)
#ifndef FSTACK
"%s: illegal option -- %c\n", _getprogname(),
#else
"illegal option -- %c\n",
"%s: illegal option -- %c\n", getprogname(),
#endif
optopt);
return (BADCH);
@ -139,7 +141,8 @@ getopt(int nargc, char * const nargv[], const char *ostr)
"%s: option requires an argument -- %c\n",
_getprogname(), optopt);
#else
"option requires an argument -- %c\n", optopt);
"%s: option requires an argument -- %c\n",
getprogname(), optopt);
#endif
return (BADCH);
}

View File

@ -0,0 +1,10 @@
#define _GNU_SOURCE
#include <errno.h>
#include "compat.h"
const char *
getprogname(void)
{
return program_invocation_name;
}

View File

@ -0,0 +1,327 @@
/*-
* Copyright (c) 2005-2009 Apple Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of Apple Inc. ("Apple") nor the names of
* its contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _BSM_AUDIT_H
#define _BSM_AUDIT_H
#include <sys/param.h>
#include <sys/types.h>
#define AUDIT_RECORD_MAGIC 0x828a0f1b
#define MAX_AUDIT_RECORDS 20
#define MAXAUDITDATA (0x8000 - 1)
#define MAX_AUDIT_RECORD_SIZE MAXAUDITDATA
#define MIN_AUDIT_FILE_SIZE (512 * 1024)
/*
* Minimum noumber of free blocks on the filesystem containing the audit
* log necessary to avoid a hard log rotation. DO NOT SET THIS VALUE TO 0
* as the kernel does an unsigned compare, plus we want to leave a few blocks
* free so userspace can terminate the log, etc.
*/
#define AUDIT_HARD_LIMIT_FREE_BLOCKS 4
/*
* Triggers for the audit daemon.
*/
#define AUDIT_TRIGGER_MIN 1
#define AUDIT_TRIGGER_LOW_SPACE 1 /* Below low watermark. */
#define AUDIT_TRIGGER_ROTATE_KERNEL 2 /* Kernel requests rotate. */
#define AUDIT_TRIGGER_READ_FILE 3 /* Re-read config file. */
#define AUDIT_TRIGGER_CLOSE_AND_DIE 4 /* Terminate audit. */
#define AUDIT_TRIGGER_NO_SPACE 5 /* Below min free space. */
#define AUDIT_TRIGGER_ROTATE_USER 6 /* User requests rotate. */
#define AUDIT_TRIGGER_INITIALIZE 7 /* User initialize of auditd. */
#define AUDIT_TRIGGER_EXPIRE_TRAILS 8 /* User expiration of trails. */
#define AUDIT_TRIGGER_MAX 8
/*
* The special device filename (FreeBSD).
*/
#define AUDITDEV_FILENAME "audit"
#define AUDIT_TRIGGER_FILE ("/dev/" AUDITDEV_FILENAME)
/*
* Pre-defined audit IDs
*/
#define AU_DEFAUDITID (uid_t)(-1)
#define AU_DEFAUDITSID 0
#define AU_ASSIGN_ASID -1
/*
* IPC types.
*/
#define AT_IPC_MSG ((u_char)1) /* Message IPC id. */
#define AT_IPC_SEM ((u_char)2) /* Semaphore IPC id. */
#define AT_IPC_SHM ((u_char)3) /* Shared mem IPC id. */
/*
* Audit conditions.
*/
#define AUC_UNSET 0
#define AUC_AUDITING 1
#define AUC_NOAUDIT 2
#define AUC_DISABLED -1
/*
* auditon(2) commands.
*/
#define A_OLDGETPOLICY 2
#define A_OLDSETPOLICY 3
#define A_GETKMASK 4
#define A_SETKMASK 5
#define A_OLDGETQCTRL 6
#define A_OLDSETQCTRL 7
#define A_GETCWD 8
#define A_GETCAR 9
#define A_GETSTAT 12
#define A_SETSTAT 13
#define A_SETUMASK 14
#define A_SETSMASK 15
#define A_OLDGETCOND 20
#define A_OLDSETCOND 21
#define A_GETCLASS 22
#define A_SETCLASS 23
#define A_GETPINFO 24
#define A_SETPMASK 25
#define A_SETFSIZE 26
#define A_GETFSIZE 27
#define A_GETPINFO_ADDR 28
#define A_GETKAUDIT 29
#define A_SETKAUDIT 30
#define A_SENDTRIGGER 31
#define A_GETSINFO_ADDR 32
#define A_GETPOLICY 33
#define A_SETPOLICY 34
#define A_GETQCTRL 35
#define A_SETQCTRL 36
#define A_GETCOND 37
#define A_SETCOND 38
/*
* Audit policy controls.
*/
#define AUDIT_CNT 0x0001
#define AUDIT_AHLT 0x0002
#define AUDIT_ARGV 0x0004
#define AUDIT_ARGE 0x0008
#define AUDIT_SEQ 0x0010
#define AUDIT_WINDATA 0x0020
#define AUDIT_USER 0x0040
#define AUDIT_GROUP 0x0080
#define AUDIT_TRAIL 0x0100
#define AUDIT_PATH 0x0200
#define AUDIT_SCNT 0x0400
#define AUDIT_PUBLIC 0x0800
#define AUDIT_ZONENAME 0x1000
#define AUDIT_PERZONE 0x2000
/*
* Default audit queue control parameters.
*/
#define AQ_HIWATER 100
#define AQ_MAXHIGH 10000
#define AQ_LOWATER 10
#define AQ_BUFSZ MAXAUDITDATA
#define AQ_MAXBUFSZ 1048576
/*
* Default minimum percentage free space on file system.
*/
#define AU_FS_MINFREE 20
/*
* Type definitions used indicating the length of variable length addresses
* in tokens containing addresses, such as header fields.
*/
#define AU_IPv4 4
#define AU_IPv6 16
__BEGIN_DECLS
typedef uid_t au_id_t;
typedef pid_t au_asid_t;
typedef u_int16_t au_event_t;
typedef u_int16_t au_emod_t;
typedef u_int32_t au_class_t;
typedef u_int64_t au_asflgs_t __attribute__ ((aligned (8)));
struct au_tid {
dev_t port;
u_int32_t machine;
};
typedef struct au_tid au_tid_t;
struct au_tid_addr {
dev_t at_port;
u_int32_t at_type;
u_int32_t at_addr[4];
};
typedef struct au_tid_addr au_tid_addr_t;
struct au_mask {
unsigned int am_success; /* Success bits. */
unsigned int am_failure; /* Failure bits. */
};
typedef struct au_mask au_mask_t;
struct auditinfo {
au_id_t ai_auid; /* Audit user ID. */
au_mask_t ai_mask; /* Audit masks. */
au_tid_t ai_termid; /* Terminal ID. */
au_asid_t ai_asid; /* Audit session ID. */
};
typedef struct auditinfo auditinfo_t;
struct auditinfo_addr {
au_id_t ai_auid; /* Audit user ID. */
au_mask_t ai_mask; /* Audit masks. */
au_tid_addr_t ai_termid; /* Terminal ID. */
au_asid_t ai_asid; /* Audit session ID. */
au_asflgs_t ai_flags; /* Audit session flags. */
};
typedef struct auditinfo_addr auditinfo_addr_t;
struct auditpinfo {
pid_t ap_pid; /* ID of target process. */
au_id_t ap_auid; /* Audit user ID. */
au_mask_t ap_mask; /* Audit masks. */
au_tid_t ap_termid; /* Terminal ID. */
au_asid_t ap_asid; /* Audit session ID. */
};
typedef struct auditpinfo auditpinfo_t;
struct auditpinfo_addr {
pid_t ap_pid; /* ID of target process. */
au_id_t ap_auid; /* Audit user ID. */
au_mask_t ap_mask; /* Audit masks. */
au_tid_addr_t ap_termid; /* Terminal ID. */
au_asid_t ap_asid; /* Audit session ID. */
au_asflgs_t ap_flags; /* Audit session flags. */
};
typedef struct auditpinfo_addr auditpinfo_addr_t;
struct au_session {
auditinfo_addr_t *as_aia_p; /* Ptr to full audit info. */
au_mask_t as_mask; /* Process Audit Masks. */
};
typedef struct au_session au_session_t;
/*
* Contents of token_t are opaque outside of libbsm.
*/
typedef struct au_token token_t;
/*
* Kernel audit queue control parameters:
* Default: Maximum:
* aq_hiwater: AQ_HIWATER (100) AQ_MAXHIGH (10000)
* aq_lowater: AQ_LOWATER (10) <aq_hiwater
* aq_bufsz: AQ_BUFSZ (32767) AQ_MAXBUFSZ (1048576)
* aq_delay: 20 20000 (not used)
*/
struct au_qctrl {
int aq_hiwater; /* Max # of audit recs in queue when */
/* threads with new ARs get blocked. */
int aq_lowater; /* # of audit recs in queue when */
/* blocked threads get unblocked. */
int aq_bufsz; /* Max size of audit record for audit(2). */
int aq_delay; /* Queue delay (not used). */
int aq_minfree; /* Minimum filesystem percent free space. */
};
typedef struct au_qctrl au_qctrl_t;
/*
* Structure for the audit statistics.
*/
struct audit_stat {
unsigned int as_version;
unsigned int as_numevent;
int as_generated;
int as_nonattrib;
int as_kernel;
int as_audit;
int as_auditctl;
int as_enqueue;
int as_written;
int as_wblocked;
int as_rblocked;
int as_dropped;
int as_totalsize;
unsigned int as_memused;
};
typedef struct audit_stat au_stat_t;
/*
* Structure for the audit file statistics.
*/
struct audit_fstat {
u_int64_t af_filesz;
u_int64_t af_currsz;
};
typedef struct audit_fstat au_fstat_t;
/*
* Audit to event class mapping.
*/
struct au_evclass_map {
au_event_t ec_number;
au_class_t ec_class;
};
typedef struct au_evclass_map au_evclass_map_t;
/*
* Audit system calls.
*/
#if !defined(_KERNEL) && !defined(KERNEL)
int audit(const void *, int);
int auditon(int, void *, int);
int auditctl(const char *);
int getauid(au_id_t *);
int setauid(const au_id_t *);
int getaudit(struct auditinfo *);
int setaudit(const struct auditinfo *);
int getaudit_addr(struct auditinfo_addr *, int);
int setaudit_addr(const struct auditinfo_addr *, int);
#ifdef __APPLE_API_PRIVATE
#include <mach/port.h>
mach_port_name_t audit_session_self(void);
au_asid_t audit_session_join(mach_port_name_t port);
#endif /* __APPLE_API_PRIVATE */
#endif /* defined(_KERNEL) || defined(KERNEL) */
__END_DECLS
#endif /* !_BSM_AUDIT_H */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,161 @@
/*-
* Copyright (c) 1990, 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from the Stanford/CMU enet packet filter,
* (net/enet.c) distributed as part of 4.3BSD, and code contributed
* to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
* Berkeley Laboratory.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)bpfdesc.h 8.1 (Berkeley) 6/10/93
*
* $FreeBSD$
*/
#ifndef _NET_BPFDESC_H_
#define _NET_BPFDESC_H_
#include <sys/callout.h>
#include <sys/selinfo.h>
#include <sys/queue.h>
#include <net/if.h>
/*
* Descriptor associated with each open bpf file.
*/
struct zbuf;
struct bpf_d {
LIST_ENTRY(bpf_d) bd_next; /* Linked list of descriptors */
/*
* Buffer slots: two memory buffers store the incoming packets.
* The model has three slots. Sbuf is always occupied.
* sbuf (store) - Receive interrupt puts packets here.
* hbuf (hold) - When sbuf is full, put buffer here and
* wakeup read (replace sbuf with fbuf).
* fbuf (free) - When read is done, put buffer here.
* On receiving, if sbuf is full and fbuf is 0, packet is dropped.
*/
caddr_t bd_sbuf; /* store slot */
caddr_t bd_hbuf; /* hold slot */
caddr_t bd_fbuf; /* free slot */
int bd_hbuf_in_use; /* don't rotate buffers */
int bd_slen; /* current length of store buffer */
int bd_hlen; /* current length of hold buffer */
int bd_bufsize; /* absolute length of buffers */
struct bpf_if * bd_bif; /* interface descriptor */
u_long bd_rtout; /* Read timeout in 'ticks' */
struct bpf_insn *bd_rfilter; /* read filter code */
struct bpf_insn *bd_wfilter; /* write filter code */
void *bd_bfilter; /* binary filter code */
u_int64_t bd_rcount; /* number of packets received */
u_int64_t bd_dcount; /* number of packets dropped */
u_char bd_promisc; /* true if listening promiscuously */
u_char bd_state; /* idle, waiting, or timed out */
u_char bd_immediate; /* true to return on packet arrival */
u_char bd_writer; /* non-zero if d is writer-only */
int bd_hdrcmplt; /* false to fill in src lladdr automatically */
int bd_direction; /* select packet direction */
int bd_tstamp; /* select time stamping function */
int bd_feedback; /* true to feed back sent packets */
int bd_async; /* non-zero if packet reception should generate signal */
int bd_sig; /* signal to send upon packet reception */
struct sigio * bd_sigio; /* information for async I/O */
struct selinfo bd_sel; /* bsd select info */
struct mtx bd_lock; /* per-descriptor lock */
struct callout bd_callout; /* for BPF timeouts with select */
struct label *bd_label; /* MAC label for descriptor */
u_int64_t bd_fcount; /* number of packets which matched filter */
pid_t bd_pid; /* PID which created descriptor */
int bd_locked; /* true if descriptor is locked */
u_int bd_bufmode; /* Current buffer mode. */
u_int64_t bd_wcount; /* number of packets written */
u_int64_t bd_wfcount; /* number of packets that matched write filter */
u_int64_t bd_wdcount; /* number of packets dropped during a write */
u_int64_t bd_zcopy; /* number of zero copy operations */
u_char bd_compat32; /* 32-bit stream on LP64 system */
};
/* Values for bd_state */
#define BPF_IDLE 0 /* no select in progress */
#define BPF_WAITING 1 /* waiting for read timeout in select */
#define BPF_TIMED_OUT 2 /* read timeout has expired in select */
#define BPFD_LOCK(bd) mtx_lock(&(bd)->bd_lock)
#define BPFD_UNLOCK(bd) mtx_unlock(&(bd)->bd_lock)
#define BPFD_LOCK_ASSERT(bd) mtx_assert(&(bd)->bd_lock, MA_OWNED)
#define BPF_PID_REFRESH(bd, td) (bd)->bd_pid = (td)->td_proc->p_pid
#define BPF_PID_REFRESH_CUR(bd) (bd)->bd_pid = curthread->td_proc->p_pid
#define BPF_LOCK() mtx_lock(&bpf_mtx)
#define BPF_UNLOCK() mtx_unlock(&bpf_mtx)
#define BPF_LOCK_ASSERT() mtx_assert(&bpf_mtx, MA_OWNED)
/*
* External representation of the bpf descriptor
*/
struct xbpf_d {
u_int bd_structsize; /* Size of this structure. */
u_char bd_promisc;
u_char bd_immediate;
u_char __bd_pad[6];
int bd_hdrcmplt;
int bd_direction;
int bd_feedback;
int bd_async;
u_int64_t bd_rcount;
u_int64_t bd_dcount;
u_int64_t bd_fcount;
int bd_sig;
int bd_slen;
int bd_hlen;
int bd_bufsize;
pid_t bd_pid;
char bd_ifname[IFNAMSIZ];
int bd_locked;
u_int64_t bd_wcount;
u_int64_t bd_wfcount;
u_int64_t bd_wdcount;
u_int64_t bd_zcopy;
int bd_bufmode;
/*
* Allocate 4 64 bit unsigned integers for future expansion so we do
* not have to worry about breaking the ABI.
*/
u_int64_t bd_spare[4];
};
#define BPFIF_RLOCK(bif) rw_rlock(&(bif)->bif_lock)
#define BPFIF_RUNLOCK(bif) rw_runlock(&(bif)->bif_lock)
#define BPFIF_WLOCK(bif) rw_wlock(&(bif)->bif_lock)
#define BPFIF_WUNLOCK(bif) rw_wunlock(&(bif)->bif_lock)
#define BPFIF_FLAG_DYING 1 /* Reject new bpf consumers */
#endif

View File

@ -0,0 +1,56 @@
/*-
* Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org>
* Copyright (c) 2008-2010, BitGravity Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Neither the name of the BitGravity Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*
*/
#ifndef _NET_FLOWTABLE_H_
#define _NET_FLOWTABLE_H_
struct flowtable_stat {
uint64_t ft_collisions;
uint64_t ft_misses;
uint64_t ft_free_checks;
uint64_t ft_frees;
uint64_t ft_hits;
uint64_t ft_lookups;
uint64_t ft_fail_lle_invalid;
uint64_t ft_inserts;
};
#ifdef _KERNEL
/*
* Given a flow table, look up the L3 and L2 information
* and return it in the route.
*/
int flowtable_lookup(sa_family_t, struct mbuf *, struct route *);
void flowtable_route_flush(sa_family_t, struct rtentry *);
#endif /* _KERNEL */
#endif /* !_NET_FLOWTABLE_H_ */

View File

@ -0,0 +1,249 @@
/*-
* Copyright (c) 2007-2009 Robert N. M. Watson
* Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
* This software was developed by Robert N. M. Watson under contract
* to Juniper Networks, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _NET_NETISR_H_
#define _NET_NETISR_H_
/*
* The netisr (network interrupt service routine) provides a deferred
* execution evironment in which (generally inbound) network processing can
* take place. Protocols register handlers which will be executed directly,
* or via deferred dispatch, depending on the circumstances.
*
* Historically, this was implemented by the BSD software ISR facility; it is
* now implemented via a software ithread (SWI).
*/
/*
* Protocol numbers, which are encoded in monitoring applications and kernel
* modules. Internally, these are used in bit shift operations so must have
* a value 0 < proto < 32; we currently further limit at compile-time to 16
* for array-sizing purposes.
*/
#define NETISR_IP 1
#define NETISR_IGMP 2 /* IGMPv3 output queue */
#define NETISR_ROUTE 3 /* routing socket */
#define NETISR_ARP 4 /* same as AF_LINK */
#define NETISR_ETHER 5 /* ethernet input */
#define NETISR_IPV6 6
#define NETISR_NATM 7
#define NETISR_EPAIR 8 /* if_epair(4) */
#define NETISR_IP_DIRECT 9 /* direct-dispatch IPv4 */
#define NETISR_IPV6_DIRECT 10 /* direct-dispatch IPv6 */
/*
* Protocol ordering and affinity policy constants. See the detailed
* discussion of policies later in the file.
*/
#define NETISR_POLICY_SOURCE 1 /* Maintain source ordering. */
#define NETISR_POLICY_FLOW 2 /* Maintain flow ordering. */
#define NETISR_POLICY_CPU 3 /* Protocol determines CPU placement. */
/*
* Protocol dispatch policy constants; selects whether and when direct
* dispatch is permitted.
*/
#define NETISR_DISPATCH_DEFAULT 0 /* Use global default. */
#define NETISR_DISPATCH_DEFERRED 1 /* Always defer dispatch. */
#define NETISR_DISPATCH_HYBRID 2 /* Allow hybrid dispatch. */
#define NETISR_DISPATCH_DIRECT 3 /* Always direct dispatch. */
/*
* Monitoring data structures, exported by sysctl(2).
*
* Three sysctls are defined. First, a per-protocol structure exported by
* net.isr.proto.
*/
#define NETISR_NAMEMAXLEN 32
struct sysctl_netisr_proto {
u_int snp_version; /* Length of struct. */
char snp_name[NETISR_NAMEMAXLEN]; /* nh_name */
u_int snp_proto; /* nh_proto */
u_int snp_qlimit; /* nh_qlimit */
u_int snp_policy; /* nh_policy */
u_int snp_flags; /* Various flags. */
u_int snp_dispatch; /* Dispatch policy. */
u_int _snp_ispare[6];
};
/*
* Flags for sysctl_netisr_proto.snp_flags.
*/
#define NETISR_SNP_FLAGS_M2FLOW 0x00000001 /* nh_m2flow */
#define NETISR_SNP_FLAGS_M2CPUID 0x00000002 /* nh_m2cpuid */
#define NETISR_SNP_FLAGS_DRAINEDCPU 0x00000004 /* nh_drainedcpu */
/*
* Next, a structure per-workstream, with per-protocol data, exported as
* net.isr.workstream.
*/
struct sysctl_netisr_workstream {
u_int snws_version; /* Length of struct. */
u_int snws_flags; /* Various flags. */
u_int snws_wsid; /* Workstream ID. */
u_int snws_cpu; /* nws_cpu */
u_int _snws_ispare[12];
};
/*
* Flags for sysctl_netisr_workstream.snws_flags
*/
#define NETISR_SNWS_FLAGS_INTR 0x00000001 /* nws_intr_event */
/*
* Finally, a per-workstream-per-protocol structure, exported as
* net.isr.work.
*/
struct sysctl_netisr_work {
u_int snw_version; /* Length of struct. */
u_int snw_wsid; /* Workstream ID. */
u_int snw_proto; /* Protocol number. */
u_int snw_len; /* nw_len */
u_int snw_watermark; /* nw_watermark */
u_int _snw_ispare[3];
uint64_t snw_dispatched; /* nw_dispatched */
uint64_t snw_hybrid_dispatched; /* nw_hybrid_dispatched */
uint64_t snw_qdrops; /* nw_qdrops */
uint64_t snw_queued; /* nw_queued */
uint64_t snw_handled; /* nw_handled */
uint64_t _snw_llspare[7];
};
#ifdef _KERNEL
/*-
* Protocols express ordering constraints and affinity preferences by
* implementing one or neither of nh_m2flow and nh_m2cpuid, which are used by
* netisr to determine which per-CPU workstream to assign mbufs to.
*
* The following policies may be used by protocols:
*
* NETISR_POLICY_SOURCE - netisr should maintain source ordering without
* advice from the protocol. netisr will ignore any
* flow IDs present on the mbuf for the purposes of
* work placement.
*
* NETISR_POLICY_FLOW - netisr should maintain flow ordering as defined by
* the mbuf header flow ID field. If the protocol
* implements nh_m2flow, then netisr will query the
* protocol in the event that the mbuf doesn't have a
* flow ID, falling back on source ordering.
*
* NETISR_POLICY_CPU - netisr will delegate all work placement decisions to
* the protocol, querying nh_m2cpuid for each packet.
*
* Protocols might make decisions about work placement based on an existing
* calculated flow ID on the mbuf, such as one provided in hardware, the
* receive interface pointed to by the mbuf (if any), the optional source
* identifier passed at some dispatch points, or even parse packet headers to
* calculate a flow. Both protocol handlers may return a new mbuf pointer
* for the chain, or NULL if the packet proves invalid or m_pullup() fails.
*
* XXXRW: If we eventually support dynamic reconfiguration, there should be
* protocol handlers to notify them of CPU configuration changes so that they
* can rebalance work.
*/
struct mbuf;
typedef void netisr_handler_t(struct mbuf *m);
typedef struct mbuf *netisr_m2cpuid_t(struct mbuf *m, uintptr_t source,
u_int *cpuid);
typedef struct mbuf *netisr_m2flow_t(struct mbuf *m, uintptr_t source);
typedef void netisr_drainedcpu_t(u_int cpuid);
#define NETISR_CPUID_NONE ((u_int)-1) /* No affinity returned. */
/*
* Data structure describing a protocol handler.
*/
struct netisr_handler {
const char *nh_name; /* Character string protocol name. */
netisr_handler_t *nh_handler; /* Protocol handler. */
netisr_m2flow_t *nh_m2flow; /* Query flow for untagged packet. */
netisr_m2cpuid_t *nh_m2cpuid; /* Query CPU to process mbuf on. */
netisr_drainedcpu_t *nh_drainedcpu; /* Callback when drained a queue. */
u_int nh_proto; /* Integer protocol ID. */
u_int nh_qlimit; /* Maximum per-CPU queue depth. */
u_int nh_policy; /* Work placement policy. */
u_int nh_dispatch; /* Dispatch policy. */
u_int nh_ispare[4]; /* For future use. */
void *nh_pspare[4]; /* For future use. */
};
/*
* Register, unregister, and other netisr handler management functions.
*/
void netisr_clearqdrops(const struct netisr_handler *nhp);
void netisr_getqdrops(const struct netisr_handler *nhp,
u_int64_t *qdropsp);
void netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp);
void netisr_register(const struct netisr_handler *nhp);
int netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit);
void netisr_unregister(const struct netisr_handler *nhp);
#ifdef VIMAGE
void netisr_register_vnet(const struct netisr_handler *nhp);
void netisr_unregister_vnet(const struct netisr_handler *nhp);
#endif
/*
* Process a packet destined for a protocol, and attempt direct dispatch.
* Supplemental source ordering information can be passed using the _src
* variant.
*/
int netisr_dispatch(u_int proto, struct mbuf *m);
int netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m);
int netisr_queue(u_int proto, struct mbuf *m);
int netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m);
/*
* Provide a default implementation of "map an ID to a CPU ID".
*/
u_int netisr_default_flow2cpu(u_int flowid);
/*
* Utility routines to return the number of CPUs participting in netisr, and
* to return a mapping from a number to a CPU ID that can be used with the
* scheduler.
*/
u_int netisr_get_cpucount(void);
u_int netisr_get_cpuid(u_int cpunumber);
/*
* Interfaces between DEVICE_POLLING and netisr.
*/
void netisr_sched_poll(void);
void netisr_poll(void);
void netisr_pollmore(void);
#endif /* !_KERNEL */
#endif /* !_NET_NETISR_H_ */

View File

@ -0,0 +1,128 @@
/*-
* Copyright (c) 2007-2009 Robert N. M. Watson
* Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
* This software was developed by Robert N. M. Watson under contract
* to Juniper Networks, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _NET_NETISR_INTERNAL_H_
#define _NET_NETISR_INTERNAL_H_
#ifndef _WANT_NETISR_INTERNAL
#error "no user-serviceable parts inside"
#endif
/*
* These definitions are private to the netisr implementation, but provided
* here for use by post-mortem crashdump analysis tools. They should not be
* used in any other context as they can and will change. Public definitions
* may be found in netisr.h.
*/
#ifndef _KERNEL
typedef void *netisr_handler_t;
typedef void *netisr_m2flow_t;
typedef void *netisr_m2cpuid_t;
typedef void *netisr_drainedcpu_t;
#endif
/*
* Each protocol is described by a struct netisr_proto, which holds all
* global per-protocol information. This data structure is set up by
* netisr_register(), and derived from the public struct netisr_handler.
*/
struct netisr_proto {
const char *np_name; /* Character string protocol name. */
netisr_handler_t *np_handler; /* Protocol handler. */
netisr_m2flow_t *np_m2flow; /* Query flow for untagged packet. */
netisr_m2cpuid_t *np_m2cpuid; /* Query CPU to process packet on. */
netisr_drainedcpu_t *np_drainedcpu; /* Callback when drained a queue. */
u_int np_qlimit; /* Maximum per-CPU queue depth. */
u_int np_policy; /* Work placement policy. */
u_int np_dispatch; /* Work dispatch policy. */
};
#define NETISR_MAXPROT 16 /* Compile-time limit. */
/*
* Protocol-specific work for each workstream is described by struct
* netisr_work. Each work descriptor consists of an mbuf queue and
* statistics.
*/
struct netisr_work {
/*
* Packet queue, linked by m_nextpkt.
*/
struct mbuf *nw_head;
struct mbuf *nw_tail;
u_int nw_len;
u_int nw_qlimit;
u_int nw_watermark;
/*
* Statistics -- written unlocked, but mostly from curcpu.
*/
u_int64_t nw_dispatched; /* Number of direct dispatches. */
u_int64_t nw_hybrid_dispatched; /* "" hybrid dispatches. */
u_int64_t nw_qdrops; /* "" drops. */
u_int64_t nw_queued; /* "" enqueues. */
u_int64_t nw_handled; /* "" handled in worker. */
};
/*
* Workstreams hold a queue of ordered work across each protocol, and are
* described by netisr_workstream. Each workstream is associated with a
* worker thread, which in turn is pinned to a CPU. Work associated with a
* workstream can be processd in other threads during direct dispatch;
* concurrent processing is prevented by the NWS_RUNNING flag, which
* indicates that a thread is already processing the work queue. It is
* important to prevent a directly dispatched packet from "skipping ahead" of
* work already in the workstream queue.
*/
struct netisr_workstream {
struct intr_event *nws_intr_event; /* Handler for stream. */
void *nws_swi_cookie; /* swi(9) cookie for stream. */
struct mtx nws_mtx; /* Synchronize work. */
u_int nws_cpu; /* CPU pinning. */
u_int nws_flags; /* Wakeup flags. */
u_int nws_pendingbits; /* Scheduled protocols. */
/*
* Each protocol has per-workstream data.
*/
struct netisr_work nws_work[NETISR_MAXPROT];
} __attribute__((__aligned__(64)));
/*
* Per-workstream flags.
*/
#define NWS_RUNNING 0x00000001 /* Currently running in a thread. */
#define NWS_DISPATCHING 0x00000002 /* Currently being direct-dispatched. */
#define NWS_SCHEDULED 0x00000004 /* Signal issued. */
#endif /* !_NET_NETISR_INTERNAL_H_ */

View File

@ -227,4 +227,17 @@ int getaddrinfo(const char *, const char *,
void freeaddrinfo(struct addrinfo *);
const char *gai_strerror(int ecode);
struct servent *getservbyport(int port, const char *proto);
struct protoent *getprotoent(void);
struct protoent *getprotobyname(const char *name);
struct protoent *getprotobynumber(int proto);
void setprotoent(int stayopen);
void endprotoent(void);
int getnameinfo(const struct sockaddr *sa, socklen_t salen,
char *host, size_t hostlen, char *serv, size_t servlen, int flags);
struct netent *getnetbyaddr(uint32_t net, int type);
#endif /* !_NETDB_H_ */

View File

@ -0,0 +1,69 @@
/*
* ng_socket.h
*/
/*-
* Copyright (c) 1996-1999 Whistle Communications, Inc.
* All rights reserved.
*
* Subject to the following obligations and disclaimer of warranty, use and
* redistribution of this software, in source or object code forms, with or
* without modifications are expressly permitted by Whistle Communications;
* provided, however, that:
* 1. Any and all reproductions of the source or object code must include the
* copyright notice above and the following disclaimer of warranties; and
* 2. No rights are granted, in any manner or form, to use Whistle
* Communications, Inc. trademarks, including the mark "WHISTLE
* COMMUNICATIONS" on advertising, endorsements, or otherwise except as
* such appears in the above copyright notice or in the software.
*
* THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND
* TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO
* REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE,
* INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
* WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY
* REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS
* SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE.
* IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES
* RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING
* WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
* PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY
* OF SUCH DAMAGE.
*
* Author: Julian Elischer <julian@freebsd.org>
*
* $FreeBSD$
* $Whistle: ng_socket.h,v 1.5 1999/01/20 00:22:14 archie Exp $
*/
#ifndef _NETGRAPH_NG_SOCKET_H_
#define _NETGRAPH_NG_SOCKET_H_
/* Netgraph node type name and cookie */
#define NG_SOCKET_NODE_TYPE "socket"
#define NGM_SOCKET_COOKIE 851601233
/* Netgraph socket(2) constants */
#define NG_DATA 1
#define NG_CONTROL 2
/* Commands */
enum {
NGM_SOCK_CMD_NOLINGER = 1, /* close the socket with last hook */
NGM_SOCK_CMD_LINGER /* Keep socket even if 0 hooks */
};
/* Netgraph version of struct sockaddr */
struct sockaddr_ng {
unsigned char sg_len; /* total length */
sa_family_t sg_family; /* address family */
char sg_data[14]; /* actually longer; address value */
};
#endif /* _NETGRAPH_NG_SOCKET_H_ */

View File

@ -0,0 +1,102 @@
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)icmp_var.h 8.1 (Berkeley) 6/10/93
* $FreeBSD$
*/
#ifndef _NETINET_ICMP_VAR_H_
#define _NETINET_ICMP_VAR_H_
/*
* Variables related to this implementation
* of the internet control message protocol.
*/
struct icmpstat {
/* statistics related to icmp packets generated */
u_long icps_error; /* # of calls to icmp_error */
u_long icps_oldshort; /* no error 'cuz old ip too short */
u_long icps_oldicmp; /* no error 'cuz old was icmp */
u_long icps_outhist[ICMP_MAXTYPE + 1];
/* statistics related to input messages processed */
u_long icps_badcode; /* icmp_code out of range */
u_long icps_tooshort; /* packet < ICMP_MINLEN */
u_long icps_checksum; /* bad checksum */
u_long icps_badlen; /* calculated bound mismatch */
u_long icps_reflect; /* number of responses */
u_long icps_inhist[ICMP_MAXTYPE + 1];
u_long icps_bmcastecho; /* b/mcast echo requests dropped */
u_long icps_bmcasttstamp; /* b/mcast tstamp requests dropped */
u_long icps_badaddr; /* bad return address */
u_long icps_noroute; /* no route back */
};
#ifdef _KERNEL
#include <sys/counter.h>
VNET_PCPUSTAT_DECLARE(struct icmpstat, icmpstat);
/*
* In-kernel consumers can use these accessor macros directly to update
* stats.
*/
#define ICMPSTAT_ADD(name, val) \
VNET_PCPUSTAT_ADD(struct icmpstat, icmpstat, name, (val))
#define ICMPSTAT_INC(name) ICMPSTAT_ADD(name, 1)
/*
* Kernel module consumers must use this accessor macro.
*/
void kmod_icmpstat_inc(int statnum);
#define KMOD_ICMPSTAT_INC(name) \
kmod_icmpstat_inc(offsetof(struct icmpstat, name) / sizeof(uint64_t))
#endif
/*
* Identifiers for ICMP sysctl nodes
*/
#define ICMPCTL_MASKREPL 1 /* allow replies to netmask requests */
#define ICMPCTL_STATS 2 /* statistics (read-only) */
#define ICMPCTL_ICMPLIM 3
#ifdef _KERNEL
SYSCTL_DECL(_net_inet_icmp);
extern int badport_bandlim(int);
#define BANDLIM_UNLIMITED -1
#define BANDLIM_ICMP_UNREACH 0
#define BANDLIM_ICMP_ECHO 1
#define BANDLIM_ICMP_TSTAMP 2
#define BANDLIM_RST_CLOSEDPORT 3 /* No connection, and no listeners */
#define BANDLIM_RST_OPENPORT 4 /* No connection, listener */
#define BANDLIM_ICMP6_UNREACH 5
#define BANDLIM_SCTP_OOTB 6
#define BANDLIM_MAX 6
#endif
#endif

View File

@ -0,0 +1,148 @@
/*-
* Copyright (c) 1988 Stephen Deering.
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Stephen Deering of Stanford University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)igmp.h 8.1 (Berkeley) 6/10/93
* $FreeBSD$
*/
#ifndef _NETINET_IGMP_H_
#define _NETINET_IGMP_H_
/*
* Internet Group Management Protocol (IGMP) definitions.
*
* Written by Steve Deering, Stanford, May 1988.
*
* MULTICAST Revision: 3.5.1.2
*/
/* Minimum length of any IGMP protocol message. */
#define IGMP_MINLEN 8
/*
* IGMPv1/v2 query and host report format.
*/
struct igmp {
u_char igmp_type; /* version & type of IGMP message */
u_char igmp_code; /* subtype for routing msgs */
u_short igmp_cksum; /* IP-style checksum */
struct in_addr igmp_group; /* group address being reported */
}; /* (zero for queries) */
/*
* IGMP v3 query format.
*/
struct igmpv3 {
u_char igmp_type; /* version & type of IGMP message */
u_char igmp_code; /* subtype for routing msgs */
u_short igmp_cksum; /* IP-style checksum */
struct in_addr igmp_group; /* group address being reported */
/* (zero for queries) */
u_char igmp_misc; /* reserved/suppress/robustness */
u_char igmp_qqi; /* querier's query interval */
u_short igmp_numsrc; /* number of sources */
/*struct in_addr igmp_sources[1];*/ /* source addresses */
};
#define IGMP_V3_QUERY_MINLEN 12
#define IGMP_EXP(x) (((x) >> 4) & 0x07)
#define IGMP_MANT(x) ((x) & 0x0f)
#define IGMP_QRESV(x) (((x) >> 4) & 0x0f)
#define IGMP_SFLAG(x) (((x) >> 3) & 0x01)
#define IGMP_QRV(x) ((x) & 0x07)
struct igmp_grouprec {
u_char ig_type; /* record type */
u_char ig_datalen; /* length of auxiliary data */
u_short ig_numsrc; /* number of sources */
struct in_addr ig_group; /* group address being reported */
/*struct in_addr ig_sources[1];*/ /* source addresses */
};
#define IGMP_GRPREC_HDRLEN 8
/*
* IGMPv3 host membership report header.
*/
struct igmp_report {
u_char ir_type; /* IGMP_v3_HOST_MEMBERSHIP_REPORT */
u_char ir_rsv1; /* must be zero */
u_short ir_cksum; /* checksum */
u_short ir_rsv2; /* must be zero */
u_short ir_numgrps; /* number of group records */
/*struct igmp_grouprec ir_groups[1];*/ /* group records */
};
#define IGMP_V3_REPORT_MINLEN 8
#define IGMP_V3_REPORT_MAXRECS 65535
/*
* Message types, including version number.
*/
#define IGMP_HOST_MEMBERSHIP_QUERY 0x11 /* membership query */
#define IGMP_v1_HOST_MEMBERSHIP_REPORT 0x12 /* Ver. 1 membership report */
#define IGMP_DVMRP 0x13 /* DVMRP routing message */
#define IGMP_PIM 0x14 /* PIMv1 message (historic) */
#define IGMP_v2_HOST_MEMBERSHIP_REPORT 0x16 /* Ver. 2 membership report */
#define IGMP_HOST_LEAVE_MESSAGE 0x17 /* Leave-group message */
#define IGMP_MTRACE_REPLY 0x1e /* mtrace(8) reply */
#define IGMP_MTRACE_QUERY 0x1f /* mtrace(8) probe */
#define IGMP_v3_HOST_MEMBERSHIP_REPORT 0x22 /* Ver. 3 membership report */
/*
* IGMPv3 report modes.
*/
#define IGMP_DO_NOTHING 0 /* don't send a record */
#define IGMP_MODE_IS_INCLUDE 1 /* MODE_IN */
#define IGMP_MODE_IS_EXCLUDE 2 /* MODE_EX */
#define IGMP_CHANGE_TO_INCLUDE_MODE 3 /* TO_IN */
#define IGMP_CHANGE_TO_EXCLUDE_MODE 4 /* TO_EX */
#define IGMP_ALLOW_NEW_SOURCES 5 /* ALLOW_NEW */
#define IGMP_BLOCK_OLD_SOURCES 6 /* BLOCK_OLD */
/*
* IGMPv3 query types.
*/
#define IGMP_V3_GENERAL_QUERY 1
#define IGMP_V3_GROUP_QUERY 2
#define IGMP_V3_GROUP_SOURCE_QUERY 3
/*
* Maximum report interval for IGMP v1/v2 host membership reports [RFC 1112]
*/
#define IGMP_V1V2_MAX_RI 10
#define IGMP_MAX_HOST_REPORT_DELAY IGMP_V1V2_MAX_RI
/*
* IGMP_TIMER_SCALE denotes that the igmp code field specifies
* time in tenths of a second.
*/
#define IGMP_TIMER_SCALE 10
#endif /* _NETINET_IGMP_H_ */

View File

@ -0,0 +1,231 @@
/*-a
* Copyright (c) 1988 Stephen Deering.
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Stephen Deering of Stanford University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: @(#)igmp_var.h 8.1 (Berkeley) 7/19/93
* $FreeBSD$
*/
#ifndef _NETINET_IGMP_VAR_H_
#define _NETINET_IGMP_VAR_H_
/*
* Internet Group Management Protocol (IGMP),
* implementation-specific definitions.
*
* Written by Steve Deering, Stanford, May 1988.
*
* MULTICAST Revision: 3.5.1.3
*/
/*
* IGMPv3 protocol statistics.
*/
struct igmpstat {
/*
* Structure header (to insulate ABI changes).
*/
uint32_t igps_version; /* version of this structure */
uint32_t igps_len; /* length of this structure */
/*
* Message statistics.
*/
uint64_t igps_rcv_total; /* total IGMP messages received */
uint64_t igps_rcv_tooshort; /* received with too few bytes */
uint64_t igps_rcv_badttl; /* received with ttl other than 1 */
uint64_t igps_rcv_badsum; /* received with bad checksum */
/*
* Query statistics.
*/
uint64_t igps_rcv_v1v2_queries; /* received IGMPv1/IGMPv2 queries */
uint64_t igps_rcv_v3_queries; /* received IGMPv3 queries */
uint64_t igps_rcv_badqueries; /* received invalid queries */
uint64_t igps_rcv_gen_queries; /* received general queries */
uint64_t igps_rcv_group_queries;/* received group queries */
uint64_t igps_rcv_gsr_queries; /* received group-source queries */
uint64_t igps_drop_gsr_queries; /* dropped group-source queries */
/*
* Report statistics.
*/
uint64_t igps_rcv_reports; /* received membership reports */
uint64_t igps_rcv_badreports; /* received invalid reports */
uint64_t igps_rcv_ourreports; /* received reports for our groups */
uint64_t igps_rcv_nora; /* received w/o Router Alert option */
uint64_t igps_snd_reports; /* sent membership reports */
/*
* Padding for future additions.
*/
uint64_t __igps_pad[4];
};
#define IGPS_VERSION_3 3 /* as of FreeBSD 8.x */
#define IGPS_VERSION3_LEN 168
#ifdef CTASSERT
CTASSERT(sizeof(struct igmpstat) == IGPS_VERSION3_LEN);
#endif
/*
* Identifiers for IGMP sysctl nodes
*/
#define IGMPCTL_STATS 1 /* statistics (read-only) */
#define IGMP_RANDOM_DELAY(X) (random() % (X) + 1)
#define IGMP_MAX_STATE_CHANGES 24 /* Max pending changes per group */
/*
* IGMP per-group states.
*/
#define IGMP_NOT_MEMBER 0 /* Can garbage collect in_multi */
#define IGMP_SILENT_MEMBER 1 /* Do not perform IGMP for group */
#define IGMP_REPORTING_MEMBER 2 /* IGMPv1/2/3 we are reporter */
#define IGMP_IDLE_MEMBER 3 /* IGMPv1/2 we reported last */
#define IGMP_LAZY_MEMBER 4 /* IGMPv1/2 other member reporting */
#define IGMP_SLEEPING_MEMBER 5 /* IGMPv1/2 start query response */
#define IGMP_AWAKENING_MEMBER 6 /* IGMPv1/2 group timer will start */
#define IGMP_G_QUERY_PENDING_MEMBER 7 /* IGMPv3 group query pending */
#define IGMP_SG_QUERY_PENDING_MEMBER 8 /* IGMPv3 source query pending */
#define IGMP_LEAVING_MEMBER 9 /* IGMPv3 dying gasp (pending last */
/* retransmission of INCLUDE {}) */
/*
* IGMP version tag.
*/
#define IGMP_VERSION_NONE 0 /* Invalid */
#define IGMP_VERSION_1 1
#define IGMP_VERSION_2 2
#define IGMP_VERSION_3 3 /* Default */
/*
* IGMPv3 protocol control variables.
*/
#define IGMP_RV_INIT 2 /* Robustness Variable */
#define IGMP_RV_MIN 1
#define IGMP_RV_MAX 7
#define IGMP_QI_INIT 125 /* Query Interval (s) */
#define IGMP_QI_MIN 1
#define IGMP_QI_MAX 255
#define IGMP_QRI_INIT 10 /* Query Response Interval (s) */
#define IGMP_QRI_MIN 1
#define IGMP_QRI_MAX 255
#define IGMP_URI_INIT 3 /* Unsolicited Report Interval (s) */
#define IGMP_URI_MIN 0
#define IGMP_URI_MAX 10
#define IGMP_MAX_G_GS_PACKETS 8 /* # of packets to answer G/GS */
#define IGMP_MAX_STATE_CHANGE_PACKETS 8 /* # of packets per state change */
#define IGMP_MAX_RESPONSE_PACKETS 16 /* # of packets for general query */
#define IGMP_MAX_RESPONSE_BURST 4 /* # of responses to send at once */
#define IGMP_RESPONSE_BURST_INTERVAL (PR_FASTHZ / 2) /* 500ms */
/*
* IGMP-specific mbuf flags.
*/
#define M_IGMPV2 M_PROTO1 /* Packet is IGMPv2 */
#define M_IGMPV3_HDR M_PROTO2 /* Packet has IGMPv3 headers */
#define M_GROUPREC M_PROTO3 /* mbuf chain is a group record */
#define M_IGMP_LOOP M_PROTO4 /* transmit on loif, not real ifp */
/*
* Default amount of leading space for IGMPv3 to allocate at the
* beginning of its mbuf packet chains, to avoid fragmentation and
* unnecessary allocation of leading mbufs.
*/
#define RAOPT_LEN 4 /* Length of IP Router Alert option */
#define IGMP_LEADINGSPACE \
(sizeof(struct ip) + RAOPT_LEN + sizeof(struct igmp_report))
/*
* Structure returned by net.inet.igmp.ifinfo sysctl.
*/
struct igmp_ifinfo {
uint32_t igi_version; /* IGMPv3 Host Compatibility Mode */
uint32_t igi_v1_timer; /* IGMPv1 Querier Present timer (s) */
uint32_t igi_v2_timer; /* IGMPv2 Querier Present timer (s) */
uint32_t igi_v3_timer; /* IGMPv3 General Query (interface) timer (s)*/
uint32_t igi_flags; /* IGMP per-interface flags */
#define IGIF_SILENT 0x00000001 /* Do not use IGMP on this ifp */
#define IGIF_LOOPBACK 0x00000002 /* Send IGMP reports to loopback */
uint32_t igi_rv; /* IGMPv3 Robustness Variable */
uint32_t igi_qi; /* IGMPv3 Query Interval (s) */
uint32_t igi_qri; /* IGMPv3 Query Response Interval (s) */
uint32_t igi_uri; /* IGMPv3 Unsolicited Report Interval (s) */
};
#ifdef _KERNEL
#define IGMPSTAT_ADD(name, val) V_igmpstat.name += (val)
#define IGMPSTAT_INC(name) IGMPSTAT_ADD(name, 1)
/*
* Subsystem lock macros.
* The IGMP lock is only taken with IGMP. Currently it is system-wide.
* VIMAGE: The lock could be pushed to per-VIMAGE granularity in future.
*/
#define IGMP_LOCK_INIT() mtx_init(&igmp_mtx, "igmp_mtx", NULL, MTX_DEF)
#define IGMP_LOCK_DESTROY() mtx_destroy(&igmp_mtx)
#define IGMP_LOCK() mtx_lock(&igmp_mtx)
#define IGMP_LOCK_ASSERT() mtx_assert(&igmp_mtx, MA_OWNED)
#define IGMP_UNLOCK() mtx_unlock(&igmp_mtx)
#define IGMP_UNLOCK_ASSERT() mtx_assert(&igmp_mtx, MA_NOTOWNED)
/*
* Per-interface IGMP router version information.
*/
struct igmp_ifsoftc {
LIST_ENTRY(igmp_ifsoftc) igi_link;
struct ifnet *igi_ifp; /* pointer back to interface */
uint32_t igi_version; /* IGMPv3 Host Compatibility Mode */
uint32_t igi_v1_timer; /* IGMPv1 Querier Present timer (s) */
uint32_t igi_v2_timer; /* IGMPv2 Querier Present timer (s) */
uint32_t igi_v3_timer; /* IGMPv3 General Query (interface) timer (s)*/
uint32_t igi_flags; /* IGMP per-interface flags */
uint32_t igi_rv; /* IGMPv3 Robustness Variable */
uint32_t igi_qi; /* IGMPv3 Query Interval (s) */
uint32_t igi_qri; /* IGMPv3 Query Response Interval (s) */
uint32_t igi_uri; /* IGMPv3 Unsolicited Report Interval (s) */
SLIST_HEAD(,in_multi) igi_relinmhead; /* released groups */
struct mbufq igi_gq; /* general query responses queue */
};
int igmp_change_state(struct in_multi *);
void igmp_fasttimo(void);
struct igmp_ifsoftc *
igmp_domifattach(struct ifnet *);
void igmp_domifdetach(struct ifnet *);
void igmp_ifdetach(struct ifnet *);
int igmp_input(struct mbuf **, int *, int);
void igmp_slowtimo(void);
SYSCTL_DECL(_net_inet_igmp);
#endif /* _KERNEL */
#endif

View File

@ -107,11 +107,9 @@ extern uint32_t ntohl(uint32_t);
extern uint16_t ntohs(uint16_t);
#endif
#if __POSIX_VISIBLE >= 200112
#define IPPROTO_IPV6 41 /* IP6 header */
#define IPPROTO_RAW 255 /* raw IP packet */
#define INET_ADDRSTRLEN 16
#endif
#if __BSD_VISIBLE
/*

View File

@ -0,0 +1,741 @@
/*-
* Copyright (c) 1982, 1986, 1990, 1993
* The Regents of the University of California.
* Copyright (c) 2010-2011 Juniper Networks, Inc.
* All rights reserved.
*
* Portions of this software were developed by Robert N. M. Watson under
* contract to Juniper Networks, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)in_pcb.h 8.1 (Berkeley) 6/10/93
* $FreeBSD$
*/
#ifndef _NETINET_IN_PCB_H_
#define _NETINET_IN_PCB_H_
#include <sys/queue.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <sys/_rwlock.h>
#include <net/route.h>
#ifdef _KERNEL
#include <sys/lock.h>
#include <sys/rwlock.h>
#include <net/vnet.h>
#include <vm/uma.h>
#endif
#define in6pcb inpcb /* for KAME src sync over BSD*'s */
#define in6p_sp inp_sp /* for KAME src sync over BSD*'s */
struct inpcbpolicy;
/*
* struct inpcb is the common protocol control block structure used in most
* IP transport protocols.
*
* Pointers to local and foreign host table entries, local and foreign socket
* numbers, and pointers up (to a socket structure) and down (to a
* protocol-specific control block) are stored here.
*/
LIST_HEAD(inpcbhead, inpcb);
LIST_HEAD(inpcbporthead, inpcbport);
typedef u_quad_t inp_gen_t;
/*
* PCB with AF_INET6 null bind'ed laddr can receive AF_INET input packet.
* So, AF_INET6 null laddr is also used as AF_INET null laddr, by utilizing
* the following structure.
*/
struct in_addr_4in6 {
u_int32_t ia46_pad32[3];
struct in_addr ia46_addr4;
};
/*
* NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553. in_conninfo has
* some extra padding to accomplish this.
* NOTE 2: tcp_syncache.c uses first 5 32-bit words, which identify fport,
* lport, faddr to generate hash, so these fields shouldn't be moved.
*/
struct in_endpoints {
u_int16_t ie_fport; /* foreign port */
u_int16_t ie_lport; /* local port */
/* protocol dependent part, local and foreign addr */
union {
/* foreign host table entry */
struct in_addr_4in6 ie46_foreign;
struct in6_addr ie6_foreign;
} ie_dependfaddr;
union {
/* local host table entry */
struct in_addr_4in6 ie46_local;
struct in6_addr ie6_local;
} ie_dependladdr;
u_int32_t ie6_zoneid; /* scope zone id */
};
#define ie_faddr ie_dependfaddr.ie46_foreign.ia46_addr4
#define ie_laddr ie_dependladdr.ie46_local.ia46_addr4
#define ie6_faddr ie_dependfaddr.ie6_foreign
#define ie6_laddr ie_dependladdr.ie6_local
/*
* XXX The defines for inc_* are hacks and should be changed to direct
* references.
*/
struct in_conninfo {
u_int8_t inc_flags;
u_int8_t inc_len;
u_int16_t inc_fibnum; /* XXX was pad, 16 bits is plenty */
/* protocol dependent part */
struct in_endpoints inc_ie;
};
/*
* Flags for inc_flags.
*/
#define INC_ISIPV6 0x01
#define inc_isipv6 inc_flags /* temp compatibility */
#define inc_fport inc_ie.ie_fport
#define inc_lport inc_ie.ie_lport
#define inc_faddr inc_ie.ie_faddr
#define inc_laddr inc_ie.ie_laddr
#define inc6_faddr inc_ie.ie6_faddr
#define inc6_laddr inc_ie.ie6_laddr
#define inc6_zoneid inc_ie.ie6_zoneid
struct icmp6_filter;
/*-
* struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 and
* IPv6 sockets. In the case of TCP and UDP, further per-connection state is
* hung off of inp_ppcb most of the time. Almost all fields of struct inpcb
* are static after creation or protected by a per-inpcb rwlock, inp_lock. A
* few fields are protected by multiple locks as indicated in the locking notes
* below. For these fields, all of the listed locks must be write-locked for
* any modifications. However, these fields can be safely read while any one of
* the listed locks are read-locked. This model can permit greater concurrency
* for read operations. For example, connections can be looked up while only
* holding a read lock on the global pcblist lock. This is important for
* performance when attempting to find the connection for a packet given its IP
* and port tuple.
*
* One noteworthy exception is that the global pcbinfo lock follows a different
* set of rules in relation to the inp_list field. Rather than being
* write-locked for modifications and read-locked for list iterations, it must
* be read-locked during modifications and write-locked during list iterations.
* This ensures that the relatively rare global list iterations safely walk a
* stable snapshot of connections while allowing more common list modifications
* to safely grab the pcblist lock just while adding or removing a connection
* from the global list.
*
* Key:
* (c) - Constant after initialization
* (g) - Protected by the pcbgroup lock
* (i) - Protected by the inpcb lock
* (p) - Protected by the pcbinfo lock for the inpcb
* (l) - Protected by the pcblist lock for the inpcb
* (h) - Protected by the pcbhash lock for the inpcb
* (s) - Protected by another subsystem's locks
* (x) - Undefined locking
*
* A few other notes:
*
* When a read lock is held, stability of the field is guaranteed; to write
* to a field, a write lock must generally be held.
*
* netinet/netinet6-layer code should not assume that the inp_socket pointer
* is safe to dereference without inp_lock being held, even for protocols
* other than TCP (where the inpcb persists during TIMEWAIT even after the
* socket has been freed), or there may be close(2)-related races.
*
* The inp_vflag field is overloaded, and would otherwise ideally be (c).
*
* TODO: Currently only the TCP stack is leveraging the global pcbinfo lock
* read-lock usage during modification, this model can be applied to other
* protocols (especially SCTP).
*/
struct inpcb {
LIST_ENTRY(inpcb) inp_hash; /* (h/i) hash list */
LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */
LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */
/* (p[w]) for list iteration */
/* (p[r]/l) for addition/removal */
void *inp_ppcb; /* (i) pointer to per-protocol pcb */
struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */
struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */
LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/h) group wildcard entry */
struct socket *inp_socket; /* (i) back pointer to socket */
struct ucred *inp_cred; /* (c) cache of socket cred */
u_int32_t inp_flow; /* (i) IPv6 flow information */
int inp_flags; /* (i) generic IP/datagram flags */
int inp_flags2; /* (i) generic IP/datagram flags #2*/
u_char inp_vflag; /* (i) IP version flag (v4/v6) */
u_char inp_ip_ttl; /* (i) time to live proto */
u_char inp_ip_p; /* (c) protocol proto */
u_char inp_ip_minttl; /* (i) minimum TTL or drop */
uint32_t inp_flowid; /* (x) flow id / queue id */
u_int inp_refcount; /* (i) refcount */
void *inp_pspare[5]; /* (x) packet pacing / general use */
uint32_t inp_flowtype; /* (x) M_HASHTYPE value */
uint32_t inp_rss_listen_bucket; /* (x) overridden RSS listen bucket */
u_int inp_ispare[4]; /* (x) packet pacing / user cookie /
* general use */
/* Local and foreign ports, local and foreign addr. */
struct in_conninfo inp_inc; /* (i) list for PCB's local port */
/* MAC and IPSEC policy information. */
struct label *inp_label; /* (i) MAC label */
struct inpcbpolicy *inp_sp; /* (s) for IPSEC */
/* Protocol-dependent part; options. */
struct {
u_char inp4_ip_tos; /* (i) type of service proto */
struct mbuf *inp4_options; /* (i) IP options */
struct ip_moptions *inp4_moptions; /* (i) IP mcast options */
} inp_depend4;
struct {
/* (i) IP options */
struct mbuf *inp6_options;
/* (i) IP6 options for outgoing packets */
struct ip6_pktopts *inp6_outputopts;
/* (i) IP multicast options */
struct ip6_moptions *inp6_moptions;
/* (i) ICMPv6 code type filter */
struct icmp6_filter *inp6_icmp6filt;
/* (i) IPV6_CHECKSUM setsockopt */
int inp6_cksum;
short inp6_hops;
} inp_depend6;
LIST_ENTRY(inpcb) inp_portlist; /* (i/h) */
struct inpcbport *inp_phd; /* (i/h) head of this list */
#define inp_zero_size offsetof(struct inpcb, inp_gencnt)
inp_gen_t inp_gencnt; /* (c) generation count */
struct llentry *inp_lle; /* cached L2 information */
struct rwlock inp_lock;
rt_gen_t inp_rt_cookie; /* generation for route entry */
union { /* cached L3 information */
struct route inpu_route;
struct route_in6 inpu_route6;
} inp_rtu;
#define inp_route inp_rtu.inpu_route
#define inp_route6 inp_rtu.inpu_route6
};
#define inp_fport inp_inc.inc_fport
#define inp_lport inp_inc.inc_lport
#define inp_faddr inp_inc.inc_faddr
#define inp_laddr inp_inc.inc_laddr
#define inp_ip_tos inp_depend4.inp4_ip_tos
#define inp_options inp_depend4.inp4_options
#define inp_moptions inp_depend4.inp4_moptions
#define in6p_faddr inp_inc.inc6_faddr
#define in6p_laddr inp_inc.inc6_laddr
#define in6p_zoneid inp_inc.inc6_zoneid
#define in6p_hops inp_depend6.inp6_hops /* default hop limit */
#define in6p_flowinfo inp_flow
#define in6p_options inp_depend6.inp6_options
#define in6p_outputopts inp_depend6.inp6_outputopts
#define in6p_moptions inp_depend6.inp6_moptions
#define in6p_icmp6filt inp_depend6.inp6_icmp6filt
#define in6p_cksum inp_depend6.inp6_cksum
#define inp_vnet inp_pcbinfo->ipi_vnet
/*
* The range of the generation count, as used in this implementation, is 9e19.
* We would have to create 300 billion connections per second for this number
* to roll over in a year. This seems sufficiently unlikely that we simply
* don't concern ourselves with that possibility.
*/
/*
* Interface exported to userland by various protocols which use inpcbs. Hack
* alert -- only define if struct xsocket is in scope.
*/
#ifdef _SYS_SOCKETVAR_H_
struct xinpcb {
size_t xi_len; /* length of this structure */
struct inpcb xi_inp;
struct xsocket xi_socket;
u_quad_t xi_alignment_hack;
};
struct xinpgen {
size_t xig_len; /* length of this structure */
u_int xig_count; /* number of PCBs at this time */
inp_gen_t xig_gen; /* generation count at this time */
so_gen_t xig_sogen; /* socket generation count at this time */
};
#endif /* _SYS_SOCKETVAR_H_ */
struct inpcbport {
LIST_ENTRY(inpcbport) phd_hash;
struct inpcbhead phd_pcblist;
u_short phd_port;
};
/*-
* Global data structure for each high-level protocol (UDP, TCP, ...) in both
* IPv4 and IPv6. Holds inpcb lists and information for managing them.
*
* Each pcbinfo is protected by three locks: ipi_lock, ipi_hash_lock and
* ipi_list_lock:
* - ipi_lock covering the global pcb list stability during loop iteration,
* - ipi_hash_lock covering the hashed lookup tables,
* - ipi_list_lock covering mutable global fields (such as the global
* pcb list)
*
* The lock order is:
*
* ipi_lock (before)
* inpcb locks (before)
* ipi_list locks (before)
* {ipi_hash_lock, pcbgroup locks}
*
* Locking key:
*
* (c) Constant or nearly constant after initialisation
* (g) Locked by ipi_lock
* (l) Locked by ipi_list_lock
* (h) Read using either ipi_hash_lock or inpcb lock; write requires both
* (p) Protected by one or more pcbgroup locks
* (x) Synchronisation properties poorly defined
*/
struct inpcbinfo {
/*
* Global lock protecting full inpcb list traversal
*/
struct rwlock ipi_lock;
/*
* Global list of inpcbs on the protocol.
*/
struct inpcbhead *ipi_listhead; /* (g/l) */
u_int ipi_count; /* (l) */
/*
* Generation count -- incremented each time a connection is allocated
* or freed.
*/
u_quad_t ipi_gencnt; /* (l) */
/*
* Fields associated with port lookup and allocation.
*/
u_short ipi_lastport; /* (x) */
u_short ipi_lastlow; /* (x) */
u_short ipi_lasthi; /* (x) */
/*
* UMA zone from which inpcbs are allocated for this protocol.
*/
struct uma_zone *ipi_zone; /* (c) */
/*
* Connection groups associated with this protocol. These fields are
* constant, but pcbgroup structures themselves are protected by
* per-pcbgroup locks.
*/
struct inpcbgroup *ipi_pcbgroups; /* (c) */
u_int ipi_npcbgroups; /* (c) */
u_int ipi_hashfields; /* (c) */
/*
* Global lock protecting non-pcbgroup hash lookup tables.
*/
struct rwlock ipi_hash_lock;
/*
* Global hash of inpcbs, hashed by local and foreign addresses and
* port numbers.
*/
struct inpcbhead *ipi_hashbase; /* (h) */
u_long ipi_hashmask; /* (h) */
/*
* Global hash of inpcbs, hashed by only local port number.
*/
struct inpcbporthead *ipi_porthashbase; /* (h) */
u_long ipi_porthashmask; /* (h) */
/*
* List of wildcard inpcbs for use with pcbgroups. In the past, was
* per-pcbgroup but is now global. All pcbgroup locks must be held
* to modify the list, so any is sufficient to read it.
*/
struct inpcbhead *ipi_wildbase; /* (p) */
u_long ipi_wildmask; /* (p) */
/*
* Pointer to network stack instance
*/
struct vnet *ipi_vnet; /* (c) */
/*
* general use 2
*/
void *ipi_pspare[2];
/*
* Global lock protecting global inpcb list, inpcb count, etc.
*/
struct rwlock ipi_list_lock;
};
#ifdef _KERNEL
/*
* Connection groups hold sets of connections that have similar CPU/thread
* affinity. Each connection belongs to exactly one connection group.
*/
struct inpcbgroup {
/*
* Per-connection group hash of inpcbs, hashed by local and foreign
* addresses and port numbers.
*/
struct inpcbhead *ipg_hashbase; /* (c) */
u_long ipg_hashmask; /* (c) */
/*
* Notional affinity of this pcbgroup.
*/
u_int ipg_cpu; /* (p) */
/*
* Per-connection group lock, not to be confused with ipi_lock.
* Protects the hash table hung off the group, but also the global
* wildcard list in inpcbinfo.
*/
struct mtx ipg_lock;
} __aligned(CACHE_LINE_SIZE);
#define INP_LOCK_INIT(inp, d, t) \
rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE | RW_DUPOK)
#define INP_LOCK_DESTROY(inp) rw_destroy(&(inp)->inp_lock)
#define INP_RLOCK(inp) rw_rlock(&(inp)->inp_lock)
#define INP_WLOCK(inp) rw_wlock(&(inp)->inp_lock)
#define INP_TRY_RLOCK(inp) rw_try_rlock(&(inp)->inp_lock)
#define INP_TRY_WLOCK(inp) rw_try_wlock(&(inp)->inp_lock)
#define INP_RUNLOCK(inp) rw_runlock(&(inp)->inp_lock)
#define INP_WUNLOCK(inp) rw_wunlock(&(inp)->inp_lock)
#define INP_TRY_UPGRADE(inp) rw_try_upgrade(&(inp)->inp_lock)
#define INP_DOWNGRADE(inp) rw_downgrade(&(inp)->inp_lock)
#define INP_WLOCKED(inp) rw_wowned(&(inp)->inp_lock)
#define INP_LOCK_ASSERT(inp) rw_assert(&(inp)->inp_lock, RA_LOCKED)
#define INP_RLOCK_ASSERT(inp) rw_assert(&(inp)->inp_lock, RA_RLOCKED)
#define INP_WLOCK_ASSERT(inp) rw_assert(&(inp)->inp_lock, RA_WLOCKED)
#define INP_UNLOCK_ASSERT(inp) rw_assert(&(inp)->inp_lock, RA_UNLOCKED)
/*
* These locking functions are for inpcb consumers outside of sys/netinet,
* more specifically, they were added for the benefit of TOE drivers. The
* macros are reserved for use by the stack.
*/
void inp_wlock(struct inpcb *);
void inp_wunlock(struct inpcb *);
void inp_rlock(struct inpcb *);
void inp_runlock(struct inpcb *);
#ifdef INVARIANTS
void inp_lock_assert(struct inpcb *);
void inp_unlock_assert(struct inpcb *);
#else
static __inline void
inp_lock_assert(struct inpcb *inp __unused)
{
}
static __inline void
inp_unlock_assert(struct inpcb *inp __unused)
{
}
#endif
void inp_apply_all(void (*func)(struct inpcb *, void *), void *arg);
int inp_ip_tos_get(const struct inpcb *inp);
void inp_ip_tos_set(struct inpcb *inp, int val);
struct socket *
inp_inpcbtosocket(struct inpcb *inp);
struct tcpcb *
inp_inpcbtotcpcb(struct inpcb *inp);
void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
uint32_t *faddr, uint16_t *fp);
short inp_so_options(const struct inpcb *inp);
#endif /* _KERNEL */
#define INP_INFO_LOCK_INIT(ipi, d) \
rw_init_flags(&(ipi)->ipi_lock, (d), RW_RECURSE)
#define INP_INFO_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_lock)
#define INP_INFO_RLOCK(ipi) rw_rlock(&(ipi)->ipi_lock)
#define INP_INFO_WLOCK(ipi) rw_wlock(&(ipi)->ipi_lock)
#define INP_INFO_TRY_RLOCK(ipi) rw_try_rlock(&(ipi)->ipi_lock)
#define INP_INFO_TRY_WLOCK(ipi) rw_try_wlock(&(ipi)->ipi_lock)
#define INP_INFO_TRY_UPGRADE(ipi) rw_try_upgrade(&(ipi)->ipi_lock)
#define INP_INFO_WLOCKED(ipi) rw_wowned(&(ipi)->ipi_lock)
#define INP_INFO_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_lock)
#define INP_INFO_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_lock)
#define INP_INFO_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_LOCKED)
#define INP_INFO_RLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_RLOCKED)
#define INP_INFO_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_WLOCKED)
#define INP_INFO_UNLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED)
#define INP_LIST_LOCK_INIT(ipi, d) \
rw_init_flags(&(ipi)->ipi_list_lock, (d), 0)
#define INP_LIST_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_list_lock)
#define INP_LIST_RLOCK(ipi) rw_rlock(&(ipi)->ipi_list_lock)
#define INP_LIST_WLOCK(ipi) rw_wlock(&(ipi)->ipi_list_lock)
#define INP_LIST_TRY_RLOCK(ipi) rw_try_rlock(&(ipi)->ipi_list_lock)
#define INP_LIST_TRY_WLOCK(ipi) rw_try_wlock(&(ipi)->ipi_list_lock)
#define INP_LIST_TRY_UPGRADE(ipi) rw_try_upgrade(&(ipi)->ipi_list_lock)
#define INP_LIST_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_list_lock)
#define INP_LIST_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_list_lock)
#define INP_LIST_LOCK_ASSERT(ipi) \
rw_assert(&(ipi)->ipi_list_lock, RA_LOCKED)
#define INP_LIST_RLOCK_ASSERT(ipi) \
rw_assert(&(ipi)->ipi_list_lock, RA_RLOCKED)
#define INP_LIST_WLOCK_ASSERT(ipi) \
rw_assert(&(ipi)->ipi_list_lock, RA_WLOCKED)
#define INP_LIST_UNLOCK_ASSERT(ipi) \
rw_assert(&(ipi)->ipi_list_lock, RA_UNLOCKED)
#define INP_HASH_LOCK_INIT(ipi, d) \
rw_init_flags(&(ipi)->ipi_hash_lock, (d), 0)
#define INP_HASH_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_hash_lock)
#define INP_HASH_RLOCK(ipi) rw_rlock(&(ipi)->ipi_hash_lock)
#define INP_HASH_WLOCK(ipi) rw_wlock(&(ipi)->ipi_hash_lock)
#define INP_HASH_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_hash_lock)
#define INP_HASH_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_hash_lock)
#define INP_HASH_LOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \
RA_LOCKED)
#define INP_HASH_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \
RA_WLOCKED)
#define INP_GROUP_LOCK_INIT(ipg, d) mtx_init(&(ipg)->ipg_lock, (d), NULL, \
MTX_DEF | MTX_DUPOK)
#define INP_GROUP_LOCK_DESTROY(ipg) mtx_destroy(&(ipg)->ipg_lock)
#define INP_GROUP_LOCK(ipg) mtx_lock(&(ipg)->ipg_lock)
#define INP_GROUP_LOCK_ASSERT(ipg) mtx_assert(&(ipg)->ipg_lock, MA_OWNED)
#define INP_GROUP_UNLOCK(ipg) mtx_unlock(&(ipg)->ipg_lock)
#define INP_PCBHASH(faddr, lport, fport, mask) \
(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
#define INP_PCBPORTHASH(lport, mask) \
(ntohs((lport)) & (mask))
#define INP6_PCBHASHKEY(faddr) ((faddr)->s6_addr32[3])
/*
* Flags for inp_vflags -- historically version flags only
*/
#define INP_IPV4 0x1
#define INP_IPV6 0x2
#define INP_IPV6PROTO 0x4 /* opened under IPv6 protocol */
/*
* Flags for inp_flags.
*/
#define INP_RECVOPTS 0x00000001 /* receive incoming IP options */
#define INP_RECVRETOPTS 0x00000002 /* receive IP options for reply */
#define INP_RECVDSTADDR 0x00000004 /* receive IP dst address */
#define INP_HDRINCL 0x00000008 /* user supplies entire IP header */
#define INP_HIGHPORT 0x00000010 /* user wants "high" port binding */
#define INP_LOWPORT 0x00000020 /* user wants "low" port binding */
#define INP_ANONPORT 0x00000040 /* port chosen for user */
#define INP_RECVIF 0x00000080 /* receive incoming interface */
#define INP_MTUDISC 0x00000100 /* user can do MTU discovery */
/* 0x000200 unused: was INP_FAITH */
#define INP_RECVTTL 0x00000400 /* receive incoming IP TTL */
#define INP_DONTFRAG 0x00000800 /* don't fragment packet */
#define INP_BINDANY 0x00001000 /* allow bind to any address */
#define INP_INHASHLIST 0x00002000 /* in_pcbinshash() has been called */
#define INP_RECVTOS 0x00004000 /* receive incoming IP TOS */
#define IN6P_IPV6_V6ONLY 0x00008000 /* restrict AF_INET6 socket for v6 */
#define IN6P_PKTINFO 0x00010000 /* receive IP6 dst and I/F */
#define IN6P_HOPLIMIT 0x00020000 /* receive hoplimit */
#define IN6P_HOPOPTS 0x00040000 /* receive hop-by-hop options */
#define IN6P_DSTOPTS 0x00080000 /* receive dst options after rthdr */
#define IN6P_RTHDR 0x00100000 /* receive routing header */
#define IN6P_RTHDRDSTOPTS 0x00200000 /* receive dstoptions before rthdr */
#define IN6P_TCLASS 0x00400000 /* receive traffic class value */
#define IN6P_AUTOFLOWLABEL 0x00800000 /* attach flowlabel automatically */
#define INP_TIMEWAIT 0x01000000 /* in TIMEWAIT, ppcb is tcptw */
#define INP_ONESBCAST 0x02000000 /* send all-ones broadcast */
#define INP_DROPPED 0x04000000 /* protocol drop flag */
#define INP_SOCKREF 0x08000000 /* strong socket reference */
#define INP_RESERVED_0 0x10000000 /* reserved field */
#define INP_RESERVED_1 0x20000000 /* reserved field */
#define IN6P_RFC2292 0x40000000 /* used RFC2292 API on the socket */
#define IN6P_MTU 0x80000000 /* receive path MTU */
#define INP_CONTROLOPTS (INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\
INP_RECVIF|INP_RECVTTL|INP_RECVTOS|\
IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\
IN6P_DSTOPTS|IN6P_RTHDR|IN6P_RTHDRDSTOPTS|\
IN6P_TCLASS|IN6P_AUTOFLOWLABEL|IN6P_RFC2292|\
IN6P_MTU)
/*
* Flags for inp_flags2.
*/
#define INP_LLE_VALID 0x00000001 /* cached lle is valid */
#define INP_RT_VALID 0x00000002 /* cached rtentry is valid */
#define INP_PCBGROUPWILD 0x00000004 /* in pcbgroup wildcard list */
#define INP_REUSEPORT 0x00000008 /* SO_REUSEPORT option is set */
#define INP_FREED 0x00000010 /* inp itself is not valid */
#define INP_REUSEADDR 0x00000020 /* SO_REUSEADDR option is set */
#define INP_BINDMULTI 0x00000040 /* IP_BINDMULTI option is set */
#define INP_RSS_BUCKET_SET 0x00000080 /* IP_RSS_LISTEN_BUCKET is set */
#define INP_RECVFLOWID 0x00000100 /* populate recv datagram with flow info */
#define INP_RECVRSSBUCKETID 0x00000200 /* populate recv datagram with bucket id */
/*
* Flags passed to in_pcblookup*() functions.
*/
#define INPLOOKUP_WILDCARD 0x00000001 /* Allow wildcard sockets. */
#define INPLOOKUP_RLOCKPCB 0x00000002 /* Return inpcb read-locked. */
#define INPLOOKUP_WLOCKPCB 0x00000004 /* Return inpcb write-locked. */
#define INPLOOKUP_MASK (INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB | \
INPLOOKUP_WLOCKPCB)
#define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb)
#define sotoin6pcb(so) sotoinpcb(so) /* for KAME src sync over BSD*'s */
#define INP_SOCKAF(so) so->so_proto->pr_domain->dom_family
#define INP_CHECK_SOCKAF(so, af) (INP_SOCKAF(so) == af)
/*
* Constants for pcbinfo.ipi_hashfields.
*/
#define IPI_HASHFIELDS_NONE 0
#define IPI_HASHFIELDS_2TUPLE 1
#define IPI_HASHFIELDS_4TUPLE 2
#ifdef _KERNEL
VNET_DECLARE(int, ipport_reservedhigh);
VNET_DECLARE(int, ipport_reservedlow);
VNET_DECLARE(int, ipport_lowfirstauto);
VNET_DECLARE(int, ipport_lowlastauto);
VNET_DECLARE(int, ipport_firstauto);
VNET_DECLARE(int, ipport_lastauto);
VNET_DECLARE(int, ipport_hifirstauto);
VNET_DECLARE(int, ipport_hilastauto);
VNET_DECLARE(int, ipport_randomized);
VNET_DECLARE(int, ipport_randomcps);
VNET_DECLARE(int, ipport_randomtime);
VNET_DECLARE(int, ipport_stoprandom);
VNET_DECLARE(int, ipport_tcpallocs);
#define V_ipport_reservedhigh VNET(ipport_reservedhigh)
#define V_ipport_reservedlow VNET(ipport_reservedlow)
#define V_ipport_lowfirstauto VNET(ipport_lowfirstauto)
#define V_ipport_lowlastauto VNET(ipport_lowlastauto)
#define V_ipport_firstauto VNET(ipport_firstauto)
#define V_ipport_lastauto VNET(ipport_lastauto)
#define V_ipport_hifirstauto VNET(ipport_hifirstauto)
#define V_ipport_hilastauto VNET(ipport_hilastauto)
#define V_ipport_randomized VNET(ipport_randomized)
#define V_ipport_randomcps VNET(ipport_randomcps)
#define V_ipport_randomtime VNET(ipport_randomtime)
#define V_ipport_stoprandom VNET(ipport_stoprandom)
#define V_ipport_tcpallocs VNET(ipport_tcpallocs)
void in_pcbinfo_destroy(struct inpcbinfo *);
void in_pcbinfo_init(struct inpcbinfo *, const char *, struct inpcbhead *,
int, int, char *, uma_init, uma_fini, uint32_t, u_int);
int in_pcbbind_check_bindmulti(const struct inpcb *ni,
const struct inpcb *oi);
struct inpcbgroup *
in_pcbgroup_byhash(struct inpcbinfo *, u_int, uint32_t);
struct inpcbgroup *
in_pcbgroup_byinpcb(struct inpcb *);
struct inpcbgroup *
in_pcbgroup_bytuple(struct inpcbinfo *, struct in_addr, u_short,
struct in_addr, u_short);
void in_pcbgroup_destroy(struct inpcbinfo *);
int in_pcbgroup_enabled(struct inpcbinfo *);
void in_pcbgroup_init(struct inpcbinfo *, u_int, int);
void in_pcbgroup_remove(struct inpcb *);
void in_pcbgroup_update(struct inpcb *);
void in_pcbgroup_update_mbuf(struct inpcb *, struct mbuf *);
void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
int in_pcballoc(struct socket *, struct inpcbinfo *);
int in_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *);
int in_pcb_lport(struct inpcb *, struct in_addr *, u_short *,
struct ucred *, int);
int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
u_short *, struct ucred *);
int in_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *);
int in_pcbconnect_mbuf(struct inpcb *, struct sockaddr *, struct ucred *,
struct mbuf *);
int in_pcbconnect_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
u_short *, in_addr_t *, u_short *, struct inpcb **,
struct ucred *);
void in_pcbdetach(struct inpcb *);
void in_pcbdisconnect(struct inpcb *);
void in_pcbdrop(struct inpcb *);
void in_pcbfree(struct inpcb *);
int in_pcbinshash(struct inpcb *);
int in_pcbinshash_nopcbgroup(struct inpcb *);
int in_pcbladdr(struct inpcb *, struct in_addr *, struct in_addr *,
struct ucred *);
struct inpcb *
in_pcblookup_local(struct inpcbinfo *,
struct in_addr, u_short, int, struct ucred *);
struct inpcb *
in_pcblookup(struct inpcbinfo *, struct in_addr, u_int,
struct in_addr, u_int, int, struct ifnet *);
struct inpcb *
in_pcblookup_mbuf(struct inpcbinfo *, struct in_addr, u_int,
struct in_addr, u_int, int, struct ifnet *, struct mbuf *);
void in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr,
int, struct inpcb *(*)(struct inpcb *, int));
void in_pcbref(struct inpcb *);
void in_pcbrehash(struct inpcb *);
void in_pcbrehash_mbuf(struct inpcb *, struct mbuf *);
int in_pcbrele(struct inpcb *);
int in_pcbrele_rlocked(struct inpcb *);
int in_pcbrele_wlocked(struct inpcb *);
void in_losing(struct inpcb *);
void in_pcbsetsolabel(struct socket *so);
int in_getpeeraddr(struct socket *so, struct sockaddr **nam);
int in_getsockaddr(struct socket *so, struct sockaddr **nam);
struct sockaddr *
in_sockaddr(in_port_t port, struct in_addr *addr);
void in_pcbsosetlabel(struct socket *so);
#endif /* _KERNEL */
#endif /* !_NETINET_IN_PCB_H_ */

View File

@ -0,0 +1,61 @@
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)in_systm.h 8.1 (Berkeley) 6/10/93
* $FreeBSD$
*/
#ifndef _NETINET_IN_SYSTM_H_
#define _NETINET_IN_SYSTM_H_
/*
* Miscellaneous internetwork
* definitions for kernel.
*/
/*
* Network types.
*
* Internally the system keeps counters in the headers with the bytes
* swapped so that VAX instructions will work on them. It reverses
* the bytes before transmission at each protocol level. The n_ types
* represent the types with the bytes in ``high-ender'' order. Network
* byte order is usually referered to as big-endian these days rather
* than high-ender, which sadly invokes an Orson Scott Card novel, or
* worse, the movie.
*/
typedef u_int16_t n_short; /* short as received from the net */
typedef u_int32_t n_long; /* long as received from the net */
typedef u_int32_t n_time; /* ms since 00:00 UTC, byte rev */
#ifdef _KERNEL
uint32_t iptime(void);
#endif
#endif

View File

@ -0,0 +1,222 @@
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)ip.h 8.2 (Berkeley) 6/1/94
* $FreeBSD$
*/
#ifndef _NETINET_IP_H_
#define _NETINET_IP_H_
#include <sys/cdefs.h>
/*
* Definitions for internet protocol version 4.
*
* Per RFC 791, September 1981.
*/
#define IPVERSION 4
/*
* Structure of an internet header, naked of options.
*/
struct ip {
#if BYTE_ORDER == LITTLE_ENDIAN
u_char ip_hl:4, /* header length */
ip_v:4; /* version */
#endif
#if BYTE_ORDER == BIG_ENDIAN
u_char ip_v:4, /* version */
ip_hl:4; /* header length */
#endif
u_char ip_tos; /* type of service */
u_short ip_len; /* total length */
u_short ip_id; /* identification */
u_short ip_off; /* fragment offset field */
#define IP_RF 0x8000 /* reserved fragment flag */
#define IP_DF 0x4000 /* dont fragment flag */
#define IP_MF 0x2000 /* more fragments flag */
#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */
u_char ip_ttl; /* time to live */
u_char ip_p; /* protocol */
u_short ip_sum; /* checksum */
struct in_addr ip_src,ip_dst; /* source and dest address */
} __attribute__ ((packed)) __attribute__((__aligned__(2)));
#define IP_MAXPACKET 65535 /* maximum packet size */
/*
* Definitions for IP type of service (ip_tos).
*/
#define IPTOS_LOWDELAY 0x10
#define IPTOS_THROUGHPUT 0x08
#define IPTOS_RELIABILITY 0x04
#define IPTOS_MINCOST 0x02
/*
* Definitions for IP precedence (also in ip_tos) (deprecated).
*/
#define IPTOS_PREC_NETCONTROL IPTOS_DSCP_CS7
#define IPTOS_PREC_INTERNETCONTROL IPTOS_DSCP_CS6
#define IPTOS_PREC_CRITIC_ECP IPTOS_DSCP_CS5
#define IPTOS_PREC_FLASHOVERRIDE IPTOS_DSCP_CS4
#define IPTOS_PREC_FLASH IPTOS_DSCP_CS3
#define IPTOS_PREC_IMMEDIATE IPTOS_DSCP_CS2
#define IPTOS_PREC_PRIORITY IPTOS_DSCP_CS1
#define IPTOS_PREC_ROUTINE IPTOS_DSCP_CS0
/*
* Definitions for DiffServ Codepoints as per RFC2474 and RFC5865.
*/
#define IPTOS_DSCP_CS0 0x00
#define IPTOS_DSCP_CS1 0x20
#define IPTOS_DSCP_AF11 0x28
#define IPTOS_DSCP_AF12 0x30
#define IPTOS_DSCP_AF13 0x38
#define IPTOS_DSCP_CS2 0x40
#define IPTOS_DSCP_AF21 0x48
#define IPTOS_DSCP_AF22 0x50
#define IPTOS_DSCP_AF23 0x58
#define IPTOS_DSCP_CS3 0x60
#define IPTOS_DSCP_AF31 0x68
#define IPTOS_DSCP_AF32 0x70
#define IPTOS_DSCP_AF33 0x78
#define IPTOS_DSCP_CS4 0x80
#define IPTOS_DSCP_AF41 0x88
#define IPTOS_DSCP_AF42 0x90
#define IPTOS_DSCP_AF43 0x98
#define IPTOS_DSCP_CS5 0xa0
#define IPTOS_DSCP_VA 0xb0
#define IPTOS_DSCP_EF 0xb8
#define IPTOS_DSCP_CS6 0xc0
#define IPTOS_DSCP_CS7 0xe0
/*
* ECN (Explicit Congestion Notification) codepoints in RFC3168 mapped to the
* lower 2 bits of the TOS field.
*/
#define IPTOS_ECN_NOTECT 0x00 /* not-ECT */
#define IPTOS_ECN_ECT1 0x01 /* ECN-capable transport (1) */
#define IPTOS_ECN_ECT0 0x02 /* ECN-capable transport (0) */
#define IPTOS_ECN_CE 0x03 /* congestion experienced */
#define IPTOS_ECN_MASK 0x03 /* ECN field mask */
/*
* Definitions for options.
*/
#define IPOPT_COPIED(o) ((o)&0x80)
#define IPOPT_CLASS(o) ((o)&0x60)
#define IPOPT_NUMBER(o) ((o)&0x1f)
#define IPOPT_CONTROL 0x00
#define IPOPT_RESERVED1 0x20
#define IPOPT_DEBMEAS 0x40
#define IPOPT_RESERVED2 0x60
#define IPOPT_EOL 0 /* end of option list */
#define IPOPT_NOP 1 /* no operation */
#define IPOPT_RR 7 /* record packet route */
#define IPOPT_TS 68 /* timestamp */
#define IPOPT_SECURITY 130 /* provide s,c,h,tcc */
#define IPOPT_LSRR 131 /* loose source route */
#define IPOPT_ESO 133 /* extended security */
#define IPOPT_CIPSO 134 /* commercial security */
#define IPOPT_SATID 136 /* satnet id */
#define IPOPT_SSRR 137 /* strict source route */
#define IPOPT_RA 148 /* router alert */
/*
* Offsets to fields in options other than EOL and NOP.
*/
#define IPOPT_OPTVAL 0 /* option ID */
#define IPOPT_OLEN 1 /* option length */
#define IPOPT_OFFSET 2 /* offset within option */
#define IPOPT_MINOFF 4 /* min value of above */
/*
* Time stamp option structure.
*/
struct ip_timestamp {
u_char ipt_code; /* IPOPT_TS */
u_char ipt_len; /* size of structure (variable) */
u_char ipt_ptr; /* index of current entry */
#if BYTE_ORDER == LITTLE_ENDIAN
u_char ipt_flg:4, /* flags, see below */
ipt_oflw:4; /* overflow counter */
#endif
#if BYTE_ORDER == BIG_ENDIAN
u_char ipt_oflw:4, /* overflow counter */
ipt_flg:4; /* flags, see below */
#endif
union ipt_timestamp {
uint32_t ipt_time[1]; /* network format */
struct ipt_ta {
struct in_addr ipt_addr;
uint32_t ipt_time; /* network format */
} ipt_ta[1];
} ipt_timestamp;
};
/* Flag bits for ipt_flg. */
#define IPOPT_TS_TSONLY 0 /* timestamps only */
#define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */
#define IPOPT_TS_PRESPEC 3 /* specified modules only */
/* Bits for security (not byte swapped). */
#define IPOPT_SECUR_UNCLASS 0x0000
#define IPOPT_SECUR_CONFID 0xf135
#define IPOPT_SECUR_EFTO 0x789a
#define IPOPT_SECUR_MMMM 0xbc4d
#define IPOPT_SECUR_RESTR 0xaf13
#define IPOPT_SECUR_SECRET 0xd788
#define IPOPT_SECUR_TOPSECRET 0x6bc5
/*
* Internet implementation parameters.
*/
#define MAXTTL 255 /* maximum time to live (seconds) */
#define IPDEFTTL 64 /* default ttl, from RFC 1340 */
#define IPFRAGTTL 60 /* time to live for frags, slowhz */
#define IPTTLDEC 1 /* subtracted when forwarding */
#define IP_MSS 576 /* default maximum segment size */
/*
* This is the real IPv4 pseudo header, used for computing the TCP and UDP
* checksums. For the Internet checksum, struct ipovly can be used instead.
* For stronger checksums, the real thing must be used.
*/
struct ippseudo {
struct in_addr ippseudo_src; /* source internet address */
struct in_addr ippseudo_dst; /* destination internet address */
u_char ippseudo_pad; /* pad, must be zero */
u_char ippseudo_p; /* protocol */
u_short ippseudo_len; /* protocol length */
};
#endif

View File

@ -74,7 +74,7 @@ struct carp_header {
u_int16_t carp_cksum;
u_int32_t carp_counter[2];
unsigned char carp_md[20]; /* SHA1 HMAC */
} __packed;
} __attribute__ ((packed));
#ifdef CTASSERT
CTASSERT(sizeof(struct carp_header) == 36);

View File

@ -0,0 +1,222 @@
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)ip_icmp.h 8.1 (Berkeley) 6/10/93
* $FreeBSD$
*/
#ifndef _NETINET_IP_ICMP_H_
#define _NETINET_IP_ICMP_H_
/*
* Interface Control Message Protocol Definitions.
* Per RFC 792, September 1981.
*/
/*
* Internal of an ICMP Router Advertisement
*/
struct icmp_ra_addr {
u_int32_t ira_addr;
u_int32_t ira_preference;
};
/*
* Structure of an icmp header.
*/
struct icmphdr {
u_char icmp_type; /* type of message, see below */
u_char icmp_code; /* type sub code */
u_short icmp_cksum; /* ones complement cksum of struct */
};
/*
* Structure of an icmp packet.
*
* XXX: should start with a struct icmphdr.
*/
struct icmp {
u_char icmp_type; /* type of message, see below */
u_char icmp_code; /* type sub code */
u_short icmp_cksum; /* ones complement cksum of struct */
union {
u_char ih_pptr; /* ICMP_PARAMPROB */
struct in_addr ih_gwaddr; /* ICMP_REDIRECT */
struct ih_idseq {
uint16_t icd_id; /* network format */
uint16_t icd_seq; /* network format */
} ih_idseq;
int ih_void;
/* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */
struct ih_pmtu {
uint16_t ipm_void; /* network format */
uint16_t ipm_nextmtu; /* network format */
} ih_pmtu;
struct ih_rtradv {
u_char irt_num_addrs;
u_char irt_wpa;
u_int16_t irt_lifetime;
} ih_rtradv;
} icmp_hun;
#define icmp_pptr icmp_hun.ih_pptr
#define icmp_gwaddr icmp_hun.ih_gwaddr
#define icmp_id icmp_hun.ih_idseq.icd_id
#define icmp_seq icmp_hun.ih_idseq.icd_seq
#define icmp_void icmp_hun.ih_void
#define icmp_pmvoid icmp_hun.ih_pmtu.ipm_void
#define icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu
#define icmp_num_addrs icmp_hun.ih_rtradv.irt_num_addrs
#define icmp_wpa icmp_hun.ih_rtradv.irt_wpa
#define icmp_lifetime icmp_hun.ih_rtradv.irt_lifetime
union {
struct id_ts { /* ICMP Timestamp */
/*
* The next 3 fields are in network format,
* milliseconds since 00:00 UTC
*/
uint32_t its_otime; /* Originate */
uint32_t its_rtime; /* Receive */
uint32_t its_ttime; /* Transmit */
} id_ts;
struct id_ip {
struct ip idi_ip;
/* options and then 64 bits of data */
} id_ip;
struct icmp_ra_addr id_radv;
u_int32_t id_mask;
char id_data[1];
} icmp_dun;
#define icmp_otime icmp_dun.id_ts.its_otime
#define icmp_rtime icmp_dun.id_ts.its_rtime
#define icmp_ttime icmp_dun.id_ts.its_ttime
#define icmp_ip icmp_dun.id_ip.idi_ip
#define icmp_radv icmp_dun.id_radv
#define icmp_mask icmp_dun.id_mask
#define icmp_data icmp_dun.id_data
};
/*
* Lower bounds on packet lengths for various types.
* For the error advice packets must first insure that the
* packet is large enough to contain the returned ip header.
* Only then can we do the check to see if 64 bits of packet
* data have been returned, since we need to check the returned
* ip header length.
*/
#define ICMP_MINLEN 8 /* abs minimum */
#define ICMP_TSLEN (8 + 3 * sizeof (uint32_t)) /* timestamp */
#define ICMP_MASKLEN 12 /* address mask */
#define ICMP_ADVLENMIN (8 + sizeof (struct ip) + 8) /* min */
#define ICMP_ADVLEN(p) (8 + ((p)->icmp_ip.ip_hl << 2) + 8)
/* N.B.: must separately check that ip_hl >= 5 */
/* This is the minimum length required by RFC 792. */
/*
* ICMP_ADVLENPREF is the preferred number of bytes which should be contiguous.
* SCTP needs additional 12 bytes to be able to access the initiate tag
* in packets containing an INIT chunk. For also supporting SCTP/UDP,
* additional 8 bytes are needed.
*/
#define ICMP_ADVLENPREF(p) (8 + ((p)->icmp_ip.ip_hl << 2) + 8 + 8 + 12)
/*
* Definition of type and code field values.
*/
#define ICMP_ECHOREPLY 0 /* echo reply */
#define ICMP_UNREACH 3 /* dest unreachable, codes: */
#define ICMP_UNREACH_NET 0 /* bad net */
#define ICMP_UNREACH_HOST 1 /* bad host */
#define ICMP_UNREACH_PROTOCOL 2 /* bad protocol */
#define ICMP_UNREACH_PORT 3 /* bad port */
#define ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */
#define ICMP_UNREACH_SRCFAIL 5 /* src route failed */
#define ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */
#define ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */
#define ICMP_UNREACH_ISOLATED 8 /* src host isolated */
#define ICMP_UNREACH_NET_PROHIB 9 /* prohibited access */
#define ICMP_UNREACH_HOST_PROHIB 10 /* ditto */
#define ICMP_UNREACH_TOSNET 11 /* bad tos for net */
#define ICMP_UNREACH_TOSHOST 12 /* bad tos for host */
#define ICMP_UNREACH_FILTER_PROHIB 13 /* admin prohib */
#define ICMP_UNREACH_HOST_PRECEDENCE 14 /* host prec vio. */
#define ICMP_UNREACH_PRECEDENCE_CUTOFF 15 /* prec cutoff */
#define ICMP_SOURCEQUENCH 4 /* packet lost, slow down */
#define ICMP_REDIRECT 5 /* shorter route, codes: */
#define ICMP_REDIRECT_NET 0 /* for network */
#define ICMP_REDIRECT_HOST 1 /* for host */
#define ICMP_REDIRECT_TOSNET 2 /* for tos and net */
#define ICMP_REDIRECT_TOSHOST 3 /* for tos and host */
#define ICMP_ALTHOSTADDR 6 /* alternate host address */
#define ICMP_ECHO 8 /* echo service */
#define ICMP_ROUTERADVERT 9 /* router advertisement */
#define ICMP_ROUTERADVERT_NORMAL 0 /* normal advertisement */
#define ICMP_ROUTERADVERT_NOROUTE_COMMON 16 /* selective routing */
#define ICMP_ROUTERSOLICIT 10 /* router solicitation */
#define ICMP_TIMXCEED 11 /* time exceeded, code: */
#define ICMP_TIMXCEED_INTRANS 0 /* ttl==0 in transit */
#define ICMP_TIMXCEED_REASS 1 /* ttl==0 in reass */
#define ICMP_PARAMPROB 12 /* ip header bad */
#define ICMP_PARAMPROB_ERRATPTR 0 /* error at param ptr */
#define ICMP_PARAMPROB_OPTABSENT 1 /* req. opt. absent */
#define ICMP_PARAMPROB_LENGTH 2 /* bad length */
#define ICMP_TSTAMP 13 /* timestamp request */
#define ICMP_TSTAMPREPLY 14 /* timestamp reply */
#define ICMP_IREQ 15 /* information request */
#define ICMP_IREQREPLY 16 /* information reply */
#define ICMP_MASKREQ 17 /* address mask request */
#define ICMP_MASKREPLY 18 /* address mask reply */
#define ICMP_TRACEROUTE 30 /* traceroute */
#define ICMP_DATACONVERR 31 /* data conversion error */
#define ICMP_MOBILE_REDIRECT 32 /* mobile host redirect */
#define ICMP_IPV6_WHEREAREYOU 33 /* IPv6 where-are-you */
#define ICMP_IPV6_IAMHERE 34 /* IPv6 i-am-here */
#define ICMP_MOBILE_REGREQUEST 35 /* mobile registration req */
#define ICMP_MOBILE_REGREPLY 36 /* mobile registration reply */
#define ICMP_SKIP 39 /* SKIP */
#define ICMP_PHOTURIS 40 /* Photuris */
#define ICMP_PHOTURIS_UNKNOWN_INDEX 1 /* unknown sec index */
#define ICMP_PHOTURIS_AUTH_FAILED 2 /* auth failed */
#define ICMP_PHOTURIS_DECRYPT_FAILED 3 /* decrypt failed */
#define ICMP_MAXTYPE 40
#define ICMP_INFOTYPE(type) \
((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \
(type) == ICMP_ROUTERADVERT || (type) == ICMP_ROUTERSOLICIT || \
(type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \
(type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \
(type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY)
#ifdef _KERNEL
void icmp_error(struct mbuf *, int, int, uint32_t, int);
int icmp_input(struct mbuf **, int *, int);
int ip_next_mtu(int, int);
#endif
#endif

View File

@ -0,0 +1,359 @@
/*-
* Copyright (c) 1989 Stephen Deering.
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Stephen Deering of Stanford University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)ip_mroute.h 8.1 (Berkeley) 6/10/93
* $FreeBSD$
*/
#ifndef _NETINET_IP_MROUTE_H_
#define _NETINET_IP_MROUTE_H_
/*
* Definitions for IP multicast forwarding.
*
* Written by David Waitzman, BBN Labs, August 1988.
* Modified by Steve Deering, Stanford, February 1989.
* Modified by Ajit Thyagarajan, PARC, August 1993.
* Modified by Ajit Thyagarajan, PARC, August 1994.
* Modified by Ahmed Helmy, SGI, June 1996.
* Modified by Pavlin Radoslavov, ICSI, October 2002.
*
* MROUTING Revision: 3.3.1.3
* and PIM-SMv2 and PIM-DM support, advanced API support,
* bandwidth metering and signaling.
*/
/*
* Multicast Routing set/getsockopt commands.
*/
#define MRT_INIT 100 /* initialize forwarder */
#define MRT_DONE 101 /* shut down forwarder */
#define MRT_ADD_VIF 102 /* create virtual interface */
#define MRT_DEL_VIF 103 /* delete virtual interface */
#define MRT_ADD_MFC 104 /* insert forwarding cache entry */
#define MRT_DEL_MFC 105 /* delete forwarding cache entry */
#define MRT_VERSION 106 /* get kernel version number */
#define MRT_ASSERT 107 /* enable assert processing */
#define MRT_PIM MRT_ASSERT /* enable PIM processing */
#define MRT_API_SUPPORT 109 /* supported MRT API */
#define MRT_API_CONFIG 110 /* config MRT API */
#define MRT_ADD_BW_UPCALL 111 /* create bandwidth monitor */
#define MRT_DEL_BW_UPCALL 112 /* delete bandwidth monitor */
/*
* Types and macros for handling bitmaps with one bit per virtual interface.
*/
#define MAXVIFS 32
typedef u_long vifbitmap_t;
typedef u_short vifi_t; /* type of a vif index */
#define ALL_VIFS (vifi_t)-1
#define VIFM_SET(n, m) ((m) |= (1 << (n)))
#define VIFM_CLR(n, m) ((m) &= ~(1 << (n)))
#define VIFM_ISSET(n, m) ((m) & (1 << (n)))
#define VIFM_CLRALL(m) ((m) = 0x00000000)
#define VIFM_COPY(mfrom, mto) ((mto) = (mfrom))
#define VIFM_SAME(m1, m2) ((m1) == (m2))
struct mfc;
/*
* Argument structure for MRT_ADD_VIF.
* (MRT_DEL_VIF takes a single vifi_t argument.)
*/
struct vifctl {
vifi_t vifc_vifi; /* the index of the vif to be added */
u_char vifc_flags; /* VIFF_ flags defined below */
u_char vifc_threshold; /* min ttl required to forward on vif */
u_int vifc_rate_limit; /* max rate */
struct in_addr vifc_lcl_addr; /* local interface address */
struct in_addr vifc_rmt_addr; /* remote address (tunnels only) */
};
#define VIFF_TUNNEL 0x1 /* no-op; retained for old source */
#define VIFF_SRCRT 0x2 /* no-op; retained for old source */
#define VIFF_REGISTER 0x4 /* used for PIM Register encap/decap */
/*
* Argument structure for MRT_ADD_MFC and MRT_DEL_MFC
* XXX if you change this, make sure to change struct mfcctl2 as well.
*/
struct mfcctl {
struct in_addr mfcc_origin; /* ip origin of mcasts */
struct in_addr mfcc_mcastgrp; /* multicast group associated*/
vifi_t mfcc_parent; /* incoming vif */
u_char mfcc_ttls[MAXVIFS]; /* forwarding ttls on vifs */
};
/*
* The new argument structure for MRT_ADD_MFC and MRT_DEL_MFC overlays
* and extends the old struct mfcctl.
*/
struct mfcctl2 {
/* the mfcctl fields */
struct in_addr mfcc_origin; /* ip origin of mcasts */
struct in_addr mfcc_mcastgrp; /* multicast group associated*/
vifi_t mfcc_parent; /* incoming vif */
u_char mfcc_ttls[MAXVIFS]; /* forwarding ttls on vifs */
/* extension fields */
uint8_t mfcc_flags[MAXVIFS]; /* the MRT_MFC_FLAGS_* flags */
struct in_addr mfcc_rp; /* the RP address */
};
/*
* The advanced-API flags.
*
* The MRT_MFC_FLAGS_XXX API flags are also used as flags
* for the mfcc_flags field.
*/
#define MRT_MFC_FLAGS_DISABLE_WRONGVIF (1 << 0) /* disable WRONGVIF signals */
#define MRT_MFC_FLAGS_BORDER_VIF (1 << 1) /* border vif */
#define MRT_MFC_RP (1 << 8) /* enable RP address */
#define MRT_MFC_BW_UPCALL (1 << 9) /* enable bw upcalls */
#define MRT_MFC_FLAGS_ALL (MRT_MFC_FLAGS_DISABLE_WRONGVIF | \
MRT_MFC_FLAGS_BORDER_VIF)
#define MRT_API_FLAGS_ALL (MRT_MFC_FLAGS_ALL | \
MRT_MFC_RP | \
MRT_MFC_BW_UPCALL)
/*
* Structure for installing or delivering an upcall if the
* measured bandwidth is above or below a threshold.
*
* User programs (e.g. daemons) may have a need to know when the
* bandwidth used by some data flow is above or below some threshold.
* This interface allows the userland to specify the threshold (in
* bytes and/or packets) and the measurement interval. Flows are
* all packet with the same source and destination IP address.
* At the moment the code is only used for multicast destinations
* but there is nothing that prevents its use for unicast.
*
* The measurement interval cannot be shorter than some Tmin (currently, 3s).
* The threshold is set in packets and/or bytes per_interval.
*
* Measurement works as follows:
*
* For >= measurements:
* The first packet marks the start of a measurement interval.
* During an interval we count packets and bytes, and when we
* pass the threshold we deliver an upcall and we are done.
* The first packet after the end of the interval resets the
* count and restarts the measurement.
*
* For <= measurement:
* We start a timer to fire at the end of the interval, and
* then for each incoming packet we count packets and bytes.
* When the timer fires, we compare the value with the threshold,
* schedule an upcall if we are below, and restart the measurement
* (reschedule timer and zero counters).
*/
struct bw_data {
struct timeval b_time;
uint64_t b_packets;
uint64_t b_bytes;
};
struct bw_upcall {
struct in_addr bu_src; /* source address */
struct in_addr bu_dst; /* destination address */
uint32_t bu_flags; /* misc flags (see below) */
#define BW_UPCALL_UNIT_PACKETS (1 << 0) /* threshold (in packets) */
#define BW_UPCALL_UNIT_BYTES (1 << 1) /* threshold (in bytes) */
#define BW_UPCALL_GEQ (1 << 2) /* upcall if bw >= threshold */
#define BW_UPCALL_LEQ (1 << 3) /* upcall if bw <= threshold */
#define BW_UPCALL_DELETE_ALL (1 << 4) /* delete all upcalls for s,d*/
struct bw_data bu_threshold; /* the bw threshold */
struct bw_data bu_measured; /* the measured bw */
};
/* max. number of upcalls to deliver together */
#define BW_UPCALLS_MAX 128
/* min. threshold time interval for bandwidth measurement */
#define BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC 3
#define BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC 0
/*
* The kernel's multicast routing statistics.
*/
struct mrtstat {
uint64_t mrts_mfc_lookups; /* # forw. cache hash table hits */
uint64_t mrts_mfc_misses; /* # forw. cache hash table misses */
uint64_t mrts_upcalls; /* # calls to multicast routing daemon */
uint64_t mrts_no_route; /* no route for packet's origin */
uint64_t mrts_bad_tunnel; /* malformed tunnel options */
uint64_t mrts_cant_tunnel; /* no room for tunnel options */
uint64_t mrts_wrong_if; /* arrived on wrong interface */
uint64_t mrts_upq_ovflw; /* upcall Q overflow */
uint64_t mrts_cache_cleanups; /* # entries with no upcalls */
uint64_t mrts_drop_sel; /* pkts dropped selectively */
uint64_t mrts_q_overflow; /* pkts dropped - Q overflow */
uint64_t mrts_pkt2large; /* pkts dropped - size > BKT SIZE */
uint64_t mrts_upq_sockfull; /* upcalls dropped - socket full */
};
#ifdef _KERNEL
#define MRTSTAT_ADD(name, val) \
VNET_PCPUSTAT_ADD(struct mrtstat, mrtstat, name, (val))
#define MRTSTAT_INC(name) MRTSTAT_ADD(name, 1)
#endif
/*
* Argument structure used by mrouted to get src-grp pkt counts
*/
struct sioc_sg_req {
struct in_addr src;
struct in_addr grp;
u_long pktcnt;
u_long bytecnt;
u_long wrong_if;
};
/*
* Argument structure used by mrouted to get vif pkt counts
*/
struct sioc_vif_req {
vifi_t vifi; /* vif number */
u_long icount; /* Input packet count on vif */
u_long ocount; /* Output packet count on vif */
u_long ibytes; /* Input byte count on vif */
u_long obytes; /* Output byte count on vif */
};
/*
* The kernel's virtual-interface structure.
*/
struct vif {
u_char v_flags; /* VIFF_ flags defined above */
u_char v_threshold; /* min ttl required to forward on vif*/
struct in_addr v_lcl_addr; /* local interface address */
struct in_addr v_rmt_addr; /* remote address (tunnels only) */
struct ifnet *v_ifp; /* pointer to interface */
u_long v_pkt_in; /* # pkts in on interface */
u_long v_pkt_out; /* # pkts out on interface */
u_long v_bytes_in; /* # bytes in on interface */
u_long v_bytes_out; /* # bytes out on interface */
};
#ifdef _KERNEL
/*
* The kernel's multicast forwarding cache entry structure
*/
struct mfc {
LIST_ENTRY(mfc) mfc_hash;
struct in_addr mfc_origin; /* IP origin of mcasts */
struct in_addr mfc_mcastgrp; /* multicast group associated*/
vifi_t mfc_parent; /* incoming vif */
u_char mfc_ttls[MAXVIFS]; /* forwarding ttls on vifs */
u_long mfc_pkt_cnt; /* pkt count for src-grp */
u_long mfc_byte_cnt; /* byte count for src-grp */
u_long mfc_wrong_if; /* wrong if for src-grp */
int mfc_expire; /* time to clean entry up */
struct timeval mfc_last_assert; /* last time I sent an assert*/
uint8_t mfc_flags[MAXVIFS]; /* the MRT_MFC_FLAGS_* flags */
struct in_addr mfc_rp; /* the RP address */
struct bw_meter *mfc_bw_meter; /* list of bandwidth meters */
u_long mfc_nstall; /* # of packets awaiting mfc */
TAILQ_HEAD(, rtdetq) mfc_stall; /* q of packets awaiting mfc */
};
#endif /* _KERNEL */
/*
* Struct used to communicate from kernel to multicast router
* note the convenient similarity to an IP packet
*/
struct igmpmsg {
uint32_t unused1;
uint32_t unused2;
u_char im_msgtype; /* what type of message */
#define IGMPMSG_NOCACHE 1 /* no MFC in the kernel */
#define IGMPMSG_WRONGVIF 2 /* packet came from wrong interface */
#define IGMPMSG_WHOLEPKT 3 /* PIM pkt for user level encap. */
#define IGMPMSG_BW_UPCALL 4 /* BW monitoring upcall */
u_char im_mbz; /* must be zero */
u_char im_vif; /* vif rec'd on */
u_char unused3;
struct in_addr im_src, im_dst;
};
#ifdef _KERNEL
/*
* Argument structure used for pkt info. while upcall is made
*/
struct rtdetq {
TAILQ_ENTRY(rtdetq) rte_link;
struct mbuf *m; /* A copy of the packet */
struct ifnet *ifp; /* Interface pkt came in on */
vifi_t xmt_vif; /* Saved copy of imo_multicast_vif */
};
#define MAX_UPQ 4 /* max. no of pkts in upcall Q */
#endif /* _KERNEL */
/*
* Structure for measuring the bandwidth and sending an upcall if the
* measured bandwidth is above or below a threshold.
*/
struct bw_meter {
struct bw_meter *bm_mfc_next; /* next bw meter (same mfc) */
struct bw_meter *bm_time_next; /* next bw meter (same time) */
uint32_t bm_time_hash; /* the time hash value */
struct mfc *bm_mfc; /* the corresponding mfc */
uint32_t bm_flags; /* misc flags (see below) */
#define BW_METER_UNIT_PACKETS (1 << 0) /* threshold (in packets) */
#define BW_METER_UNIT_BYTES (1 << 1) /* threshold (in bytes) */
#define BW_METER_GEQ (1 << 2) /* upcall if bw >= threshold */
#define BW_METER_LEQ (1 << 3) /* upcall if bw <= threshold */
#define BW_METER_USER_FLAGS (BW_METER_UNIT_PACKETS | \
BW_METER_UNIT_BYTES | \
BW_METER_GEQ | \
BW_METER_LEQ)
#define BW_METER_UPCALL_DELIVERED (1 << 24) /* upcall was delivered */
struct bw_data bm_threshold; /* the upcall threshold */
struct bw_data bm_measured; /* the measured bw */
struct timeval bm_start_time; /* abs. time */
};
#ifdef _KERNEL
struct sockopt;
extern int (*ip_mrouter_set)(struct socket *, struct sockopt *);
extern int (*ip_mrouter_get)(struct socket *, struct sockopt *);
extern int (*ip_mrouter_done)(void);
extern int (*mrt_ioctl)(u_long, caddr_t, int);
#endif /* _KERNEL */
#endif /* _NETINET_IP_MROUTE_H_ */

View File

@ -0,0 +1,298 @@
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)ip_var.h 8.2 (Berkeley) 1/9/95
* $FreeBSD$
*/
#ifndef _NETINET_IP_VAR_H_
#define _NETINET_IP_VAR_H_
#include <sys/queue.h>
/*
* Overlay for ip header used by other protocols (tcp, udp).
*/
struct ipovly {
u_char ih_x1[9]; /* (unused) */
u_char ih_pr; /* protocol */
u_short ih_len; /* protocol length */
struct in_addr ih_src; /* source internet address */
struct in_addr ih_dst; /* destination internet address */
};
#ifdef _KERNEL
/*
* Ip reassembly queue structure. Each fragment
* being reassembled is attached to one of these structures.
* They are timed out after ipq_ttl drops to 0, and may also
* be reclaimed if memory becomes tight.
*/
struct ipq {
TAILQ_ENTRY(ipq) ipq_list; /* to other reass headers */
u_char ipq_ttl; /* time for reass q to live */
u_char ipq_p; /* protocol of this fragment */
u_short ipq_id; /* sequence id for reassembly */
struct mbuf *ipq_frags; /* to ip headers of fragments */
struct in_addr ipq_src,ipq_dst;
u_char ipq_nfrags; /* # frags in this packet */
struct label *ipq_label; /* MAC label */
};
#endif /* _KERNEL */
/*
* Structure stored in mbuf in inpcb.ip_options
* and passed to ip_output when ip options are in use.
* The actual length of the options (including ipopt_dst)
* is in m_len.
*/
#define MAX_IPOPTLEN 40
struct ipoption {
struct in_addr ipopt_dst; /* first-hop dst if source routed */
char ipopt_list[MAX_IPOPTLEN]; /* options proper */
};
/*
* Structure attached to inpcb.ip_moptions and
* passed to ip_output when IP multicast options are in use.
* This structure is lazy-allocated.
*/
struct ip_moptions {
struct ifnet *imo_multicast_ifp; /* ifp for outgoing multicasts */
struct in_addr imo_multicast_addr; /* ifindex/addr on MULTICAST_IF */
u_long imo_multicast_vif; /* vif num outgoing multicasts */
u_char imo_multicast_ttl; /* TTL for outgoing multicasts */
u_char imo_multicast_loop; /* 1 => hear sends if a member */
u_short imo_num_memberships; /* no. memberships this socket */
u_short imo_max_memberships; /* max memberships this socket */
struct in_multi **imo_membership; /* group memberships */
struct in_mfilter *imo_mfilters; /* source filters */
STAILQ_ENTRY(ip_moptions) imo_link;
};
struct ipstat {
uint64_t ips_total; /* total packets received */
uint64_t ips_badsum; /* checksum bad */
uint64_t ips_tooshort; /* packet too short */
uint64_t ips_toosmall; /* not enough data */
uint64_t ips_badhlen; /* ip header length < data size */
uint64_t ips_badlen; /* ip length < ip header length */
uint64_t ips_fragments; /* fragments received */
uint64_t ips_fragdropped; /* frags dropped (dups, out of space) */
uint64_t ips_fragtimeout; /* fragments timed out */
uint64_t ips_forward; /* packets forwarded */
uint64_t ips_fastforward; /* packets fast forwarded */
uint64_t ips_cantforward; /* packets rcvd for unreachable dest */
uint64_t ips_redirectsent; /* packets forwarded on same net */
uint64_t ips_noproto; /* unknown or unsupported protocol */
uint64_t ips_delivered; /* datagrams delivered to upper level*/
uint64_t ips_localout; /* total ip packets generated here */
uint64_t ips_odropped; /* lost packets due to nobufs, etc. */
uint64_t ips_reassembled; /* total packets reassembled ok */
uint64_t ips_fragmented; /* datagrams successfully fragmented */
uint64_t ips_ofragments; /* output fragments created */
uint64_t ips_cantfrag; /* don't fragment flag was set, etc. */
uint64_t ips_badoptions; /* error in option processing */
uint64_t ips_noroute; /* packets discarded due to no route */
uint64_t ips_badvers; /* ip version != 4 */
uint64_t ips_rawout; /* total raw ip packets generated */
uint64_t ips_toolong; /* ip length > max ip packet size */
uint64_t ips_notmember; /* multicasts for unregistered grps */
uint64_t ips_nogif; /* no match gif found */
uint64_t ips_badaddr; /* invalid address on header */
};
#ifdef _KERNEL
#include <sys/counter.h>
#include <net/vnet.h>
VNET_PCPUSTAT_DECLARE(struct ipstat, ipstat);
/*
* In-kernel consumers can use these accessor macros directly to update
* stats.
*/
#define IPSTAT_ADD(name, val) \
VNET_PCPUSTAT_ADD(struct ipstat, ipstat, name, (val))
#define IPSTAT_SUB(name, val) IPSTAT_ADD(name, -(val))
#define IPSTAT_INC(name) IPSTAT_ADD(name, 1)
#define IPSTAT_DEC(name) IPSTAT_SUB(name, 1)
/*
* Kernel module consumers must use this accessor macro.
*/
void kmod_ipstat_inc(int statnum);
#define KMOD_IPSTAT_INC(name) \
kmod_ipstat_inc(offsetof(struct ipstat, name) / sizeof(uint64_t))
void kmod_ipstat_dec(int statnum);
#define KMOD_IPSTAT_DEC(name) \
kmod_ipstat_dec(offsetof(struct ipstat, name) / sizeof(uint64_t))
/* flags passed to ip_output as last parameter */
#define IP_FORWARDING 0x1 /* most of ip header exists */
#define IP_RAWOUTPUT 0x2 /* raw ip header exists */
#define IP_SENDONES 0x4 /* send all-ones broadcast */
#define IP_SENDTOIF 0x8 /* send on specific ifnet */
#define IP_ROUTETOIF SO_DONTROUTE /* 0x10 bypass routing tables */
#define IP_ALLOWBROADCAST SO_BROADCAST /* 0x20 can send broadcast packets */
#define IP_NODEFAULTFLOWID 0x40 /* Don't set the flowid from inp */
#ifdef __NO_STRICT_ALIGNMENT
#define IP_HDR_ALIGNED_P(ip) 1
#else
#define IP_HDR_ALIGNED_P(ip) ((((intptr_t) (ip)) & 3) == 0)
#endif
struct ip;
struct inpcb;
struct route;
struct sockopt;
VNET_DECLARE(int, ip_defttl); /* default IP ttl */
VNET_DECLARE(int, ipforwarding); /* ip forwarding */
#ifdef IPSTEALTH
VNET_DECLARE(int, ipstealth); /* stealth forwarding */
#endif
extern u_char ip_protox[];
VNET_DECLARE(struct socket *, ip_rsvpd); /* reservation protocol daemon*/
VNET_DECLARE(struct socket *, ip_mrouter); /* multicast routing daemon */
extern int (*legal_vif_num)(int);
extern u_long (*ip_mcast_src)(int);
VNET_DECLARE(int, rsvp_on);
VNET_DECLARE(int, drop_redirect);
extern struct pr_usrreqs rip_usrreqs;
#define V_ip_id VNET(ip_id)
#define V_ip_defttl VNET(ip_defttl)
#define V_ipforwarding VNET(ipforwarding)
#ifdef IPSTEALTH
#define V_ipstealth VNET(ipstealth)
#endif
#define V_ip_rsvpd VNET(ip_rsvpd)
#define V_ip_mrouter VNET(ip_mrouter)
#define V_rsvp_on VNET(rsvp_on)
#define V_drop_redirect VNET(drop_redirect)
void inp_freemoptions(struct ip_moptions *);
int inp_getmoptions(struct inpcb *, struct sockopt *);
int inp_setmoptions(struct inpcb *, struct sockopt *);
int ip_ctloutput(struct socket *, struct sockopt *sopt);
void ip_drain(void);
int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
u_long if_hwassist_flags);
void ip_forward(struct mbuf *m, int srcrt);
void ip_init(void);
extern int
(*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
struct ip_moptions *);
int ip_output(struct mbuf *,
struct mbuf *, struct route *, int, struct ip_moptions *,
struct inpcb *);
int ipproto_register(short);
int ipproto_unregister(short);
struct mbuf *
ip_reass(struct mbuf *);
void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
struct mbuf *);
void ip_slowtimo(void);
void ip_fillid(struct ip *);
int rip_ctloutput(struct socket *, struct sockopt *);
void rip_ctlinput(int, struct sockaddr *, void *);
void rip_init(void);
int rip_input(struct mbuf **, int *, int);
int rip_output(struct mbuf *, struct socket *, ...);
int ipip_input(struct mbuf **, int *, int);
int rsvp_input(struct mbuf **, int *, int);
int ip_rsvp_init(struct socket *);
int ip_rsvp_done(void);
extern int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
extern void (*ip_rsvp_force_done)(struct socket *);
extern int (*rsvp_input_p)(struct mbuf **, int *, int);
VNET_DECLARE(struct pfil_head, inet_pfil_hook); /* packet filter hooks */
#define V_inet_pfil_hook VNET(inet_pfil_hook)
void in_delayed_cksum(struct mbuf *m);
/* Hooks for ipfw, dummynet, divert etc. Most are declared in raw_ip.c */
/*
* Reference to an ipfw or packet filter rule that can be carried
* outside critical sections.
* A rule is identified by rulenum:rule_id which is ordered.
* In version chain_id the rule can be found in slot 'slot', so
* we don't need a lookup if chain_id == chain->id.
*
* On exit from the firewall this structure refers to the rule after
* the matching one (slot points to the new rule; rulenum:rule_id-1
* is the matching rule), and additional info (e.g. info often contains
* the insn argument or tablearg in the low 16 bits, in host format).
* On entry, the structure is valid if slot>0, and refers to the starting
* rules. 'info' contains the reason for reinject, e.g. divert port,
* divert direction, and so on.
*/
struct ipfw_rule_ref {
uint32_t slot; /* slot for matching rule */
uint32_t rulenum; /* matching rule number */
uint32_t rule_id; /* matching rule id */
uint32_t chain_id; /* ruleset id */
uint32_t info; /* see below */
};
enum {
IPFW_INFO_MASK = 0x0000ffff,
IPFW_INFO_OUT = 0x00000000, /* outgoing, just for convenience */
IPFW_INFO_IN = 0x80000000, /* incoming, overloads dir */
IPFW_ONEPASS = 0x40000000, /* One-pass, do not reinject */
IPFW_IS_MASK = 0x30000000, /* which source ? */
IPFW_IS_DIVERT = 0x20000000,
IPFW_IS_DUMMYNET =0x10000000,
IPFW_IS_PIPE = 0x08000000, /* pipe=1, queue = 0 */
};
#define MTAG_IPFW 1148380143 /* IPFW-tagged cookie */
#define MTAG_IPFW_RULE 1262273568 /* rule reference */
#define MTAG_IPFW_CALL 1308397630 /* call stack */
struct ip_fw_args;
typedef int (*ip_fw_chk_ptr_t)(struct ip_fw_args *args);
typedef int (*ip_fw_ctl_ptr_t)(struct sockopt *);
VNET_DECLARE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr);
#define V_ip_fw_ctl_ptr VNET(ip_fw_ctl_ptr)
/* Divert hooks. */
extern void (*ip_divert_ptr)(struct mbuf *m, int incoming);
/* ng_ipfw hooks -- XXX make it the same as divert and dummynet */
extern int (*ng_ipfw_input_p)(struct mbuf **, int,
struct ip_fw_args *, int);
extern int (*ip_dn_ctl_ptr)(struct sockopt *);
extern int (*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *);
#endif /* _KERNEL */
#endif /* !_NETINET_IP_VAR_H_ */

View File

@ -0,0 +1,79 @@
/*-
* Copyright (c) 1998-2000
* University of Southern California/Information Sciences Institute.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _NETINET_PIM_VAR_H_
#define _NETINET_PIM_VAR_H_
/*
* Protocol Independent Multicast (PIM),
* kernel variables and implementation-specific definitions.
*
* Written by George Edmond Eddy (Rusty), ISI, February 1998.
* Modified by Pavlin Radoslavov, USC/ISI, May 1998, Aug 1999, October 2000.
* Modified by Hitoshi Asaeda, WIDE, August 1998.
*/
/*
* PIM statistics kept in the kernel
*/
struct pimstat {
uint64_t pims_rcv_total_msgs; /* total PIM messages received */
uint64_t pims_rcv_total_bytes; /* total PIM bytes received */
uint64_t pims_rcv_tooshort; /* rcvd with too few bytes */
uint64_t pims_rcv_badsum; /* rcvd with bad checksum */
uint64_t pims_rcv_badversion; /* rcvd bad PIM version */
uint64_t pims_rcv_registers_msgs; /* rcvd regs. msgs (data only) */
uint64_t pims_rcv_registers_bytes; /* rcvd regs. bytes (data only) */
uint64_t pims_rcv_registers_wrongiif; /* rcvd regs. on wrong iif */
uint64_t pims_rcv_badregisters; /* rcvd invalid registers */
uint64_t pims_snd_registers_msgs; /* sent regs. msgs (data only) */
uint64_t pims_snd_registers_bytes; /* sent regs. bytes (data only) */
};
#ifdef _KERNEL
#define PIMSTAT_ADD(name, val) \
VNET_PCPUSTAT_ADD(struct pimstat, pimstat, name, (val))
#define PIMSTAT_INC(name) PIMSTAT_ADD(name, 1)
#endif
/*
* Identifiers for PIM sysctl nodes
*/
#define PIMCTL_STATS 1 /* statistics (read-only) */
#ifdef _KERNEL
int pim_input(struct mbuf **, int *, int);
SYSCTL_DECL(_net_inet_pim);
#endif
#endif /* _NETINET_PIM_VAR_H_ */

View File

@ -0,0 +1,641 @@
/*-
* Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
* Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* a) Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* b) Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
*
* c) Neither the name of Cisco Systems, Inc. nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#ifndef _NETINET_SCTP_H_
#define _NETINET_SCTP_H_
#include <sys/types.h>
#define SCTP_PACKED __attribute__((packed))
/*
* SCTP protocol - RFC4960.
*/
struct sctphdr {
uint16_t src_port; /* source port */
uint16_t dest_port; /* destination port */
uint32_t v_tag; /* verification tag of packet */
uint32_t checksum; /* CRC32C checksum */
/* chunks follow... */
} SCTP_PACKED;
/*
* SCTP Chunks
*/
struct sctp_chunkhdr {
uint8_t chunk_type; /* chunk type */
uint8_t chunk_flags; /* chunk flags */
uint16_t chunk_length; /* chunk length */
/* optional params follow */
} SCTP_PACKED;
/*
* SCTP chunk parameters
*/
struct sctp_paramhdr {
uint16_t param_type; /* parameter type */
uint16_t param_length; /* parameter length */
} SCTP_PACKED;
/*
* user socket options: socket API defined
*/
/*
* read-write options
*/
#define SCTP_RTOINFO 0x00000001
#define SCTP_ASSOCINFO 0x00000002
#define SCTP_INITMSG 0x00000003
#define SCTP_NODELAY 0x00000004
#define SCTP_AUTOCLOSE 0x00000005
#define SCTP_SET_PEER_PRIMARY_ADDR 0x00000006
#define SCTP_PRIMARY_ADDR 0x00000007
#define SCTP_ADAPTATION_LAYER 0x00000008
/* same as above */
#define SCTP_ADAPTION_LAYER 0x00000008
#define SCTP_DISABLE_FRAGMENTS 0x00000009
#define SCTP_PEER_ADDR_PARAMS 0x0000000a
#define SCTP_DEFAULT_SEND_PARAM 0x0000000b
/* ancillary data/notification interest options */
#define SCTP_EVENTS 0x0000000c /* deprecated */
/* Without this applied we will give V4 and V6 addresses on a V6 socket */
#define SCTP_I_WANT_MAPPED_V4_ADDR 0x0000000d
#define SCTP_MAXSEG 0x0000000e
#define SCTP_DELAYED_SACK 0x0000000f
#define SCTP_FRAGMENT_INTERLEAVE 0x00000010
#define SCTP_PARTIAL_DELIVERY_POINT 0x00000011
/* authentication support */
#define SCTP_AUTH_CHUNK 0x00000012
#define SCTP_AUTH_KEY 0x00000013
#define SCTP_HMAC_IDENT 0x00000014
#define SCTP_AUTH_ACTIVE_KEY 0x00000015
#define SCTP_AUTH_DELETE_KEY 0x00000016
#define SCTP_USE_EXT_RCVINFO 0x00000017
#define SCTP_AUTO_ASCONF 0x00000018 /* rw */
#define SCTP_MAXBURST 0x00000019 /* rw */
#define SCTP_MAX_BURST 0x00000019 /* rw */
/* assoc level context */
#define SCTP_CONTEXT 0x0000001a /* rw */
/* explicit EOR signalling */
#define SCTP_EXPLICIT_EOR 0x0000001b
#define SCTP_REUSE_PORT 0x0000001c /* rw */
#define SCTP_AUTH_DEACTIVATE_KEY 0x0000001d
#define SCTP_EVENT 0x0000001e
#define SCTP_RECVRCVINFO 0x0000001f
#define SCTP_RECVNXTINFO 0x00000020
#define SCTP_DEFAULT_SNDINFO 0x00000021
#define SCTP_DEFAULT_PRINFO 0x00000022
#define SCTP_PEER_ADDR_THLDS 0x00000023
#define SCTP_REMOTE_UDP_ENCAPS_PORT 0x00000024
#define SCTP_ECN_SUPPORTED 0x00000025
#define SCTP_PR_SUPPORTED 0x00000026
#define SCTP_AUTH_SUPPORTED 0x00000027
#define SCTP_ASCONF_SUPPORTED 0x00000028
#define SCTP_RECONFIG_SUPPORTED 0x00000029
#define SCTP_NRSACK_SUPPORTED 0x00000030
#define SCTP_PKTDROP_SUPPORTED 0x00000031
#define SCTP_MAX_CWND 0x00000032
/*
* read-only options
*/
#define SCTP_STATUS 0x00000100
#define SCTP_GET_PEER_ADDR_INFO 0x00000101
/* authentication support */
#define SCTP_PEER_AUTH_CHUNKS 0x00000102
#define SCTP_LOCAL_AUTH_CHUNKS 0x00000103
#define SCTP_GET_ASSOC_NUMBER 0x00000104 /* ro */
#define SCTP_GET_ASSOC_ID_LIST 0x00000105 /* ro */
#define SCTP_TIMEOUTS 0x00000106
#define SCTP_PR_STREAM_STATUS 0x00000107
#define SCTP_PR_ASSOC_STATUS 0x00000108
/*
* user socket options: BSD implementation specific
*/
/*
* Blocking I/O is enabled on any TCP type socket by default. For the UDP
* model if this is turned on then the socket buffer is shared for send
* resources amongst all associations. The default for the UDP model is that
* is SS_NBIO is set. Which means all associations have a separate send
* limit BUT they will NOT ever BLOCK instead you will get an error back
* EAGAIN if you try to send too much. If you want the blocking semantics you
* set this option at the cost of sharing one socket send buffer size amongst
* all associations. Peeled off sockets turn this option off and block. But
* since both TCP and peeled off sockets have only one assoc per socket this
* is fine. It probably does NOT make sense to set this on SS_NBIO on a TCP
* model OR peeled off UDP model, but we do allow you to do so. You just use
* the normal syscall to toggle SS_NBIO the way you want.
*
* Blocking I/O is controlled by the SS_NBIO flag on the socket state so_state
* field.
*/
#define SCTP_ENABLE_STREAM_RESET 0x00000900 /* struct
* sctp_assoc_value */
#define SCTP_RESET_STREAMS 0x00000901 /* struct
* sctp_reset_streams */
#define SCTP_RESET_ASSOC 0x00000902 /* sctp_assoc_t */
#define SCTP_ADD_STREAMS 0x00000903 /* struct
* sctp_add_streams */
/* For enable stream reset */
#define SCTP_ENABLE_RESET_STREAM_REQ 0x00000001
#define SCTP_ENABLE_RESET_ASSOC_REQ 0x00000002
#define SCTP_ENABLE_CHANGE_ASSOC_REQ 0x00000004
#define SCTP_ENABLE_VALUE_MASK 0x00000007
/* For reset streams */
#define SCTP_STREAM_RESET_INCOMING 0x00000001
#define SCTP_STREAM_RESET_OUTGOING 0x00000002
/* here on down are more implementation specific */
#define SCTP_SET_DEBUG_LEVEL 0x00001005
#define SCTP_CLR_STAT_LOG 0x00001007
/* CMT ON/OFF socket option */
#define SCTP_CMT_ON_OFF 0x00001200
#define SCTP_CMT_USE_DAC 0x00001201
/* JRS - Pluggable Congestion Control Socket option */
#define SCTP_PLUGGABLE_CC 0x00001202
/* RS - Pluggable Stream Scheduling Socket option */
#define SCTP_PLUGGABLE_SS 0x00001203
#define SCTP_SS_VALUE 0x00001204
#define SCTP_CC_OPTION 0x00001205 /* Options for CC
* modules */
/* For I-DATA */
#define SCTP_INTERLEAVING_SUPPORTED 0x00001206
/* read only */
#define SCTP_GET_SNDBUF_USE 0x00001101
#define SCTP_GET_STAT_LOG 0x00001103
#define SCTP_PCB_STATUS 0x00001104
#define SCTP_GET_NONCE_VALUES 0x00001105
/* Special hook for dynamically setting primary for all assoc's,
* this is a write only option that requires root privilege.
*/
#define SCTP_SET_DYNAMIC_PRIMARY 0x00002001
/* VRF (virtual router feature) and multi-VRF support
* options. VRF's provide splits within a router
* that give the views of multiple routers. A
* standard host, without VRF support, is just
* a single VRF. If VRF's are supported then
* the transport must be VRF aware. This means
* that every socket call coming in must be directed
* within the endpoint to one of the VRF's it belongs
* to. The endpoint, before binding, may select
* the "default" VRF it is in by using a set socket
* option with SCTP_VRF_ID. This will also
* get propagated to the default VRF. Once the
* endpoint binds an address then it CANNOT add
* additional VRF's to become a Multi-VRF endpoint.
*
* Before BINDING additional VRF's can be added with
* the SCTP_ADD_VRF_ID call or deleted with
* SCTP_DEL_VRF_ID.
*
* Associations are ALWAYS contained inside a single
* VRF. They cannot reside in two (or more) VRF's. Incoming
* packets, assuming the router is VRF aware, can always
* tell us what VRF they arrived on. A host not supporting
* any VRF's will find that the packets always arrived on the
* single VRF that the host has.
*
*/
#define SCTP_VRF_ID 0x00003001
#define SCTP_ADD_VRF_ID 0x00003002
#define SCTP_GET_VRF_IDS 0x00003003
#define SCTP_GET_ASOC_VRF 0x00003004
#define SCTP_DEL_VRF_ID 0x00003005
/*
* If you enable packet logging you can get
* a poor mans ethereal output in binary
* form. Note this is a compile option to
* the kernel, SCTP_PACKET_LOGGING, and
* without it in your kernel you
* will get a EOPNOTSUPP
*/
#define SCTP_GET_PACKET_LOG 0x00004001
/*
* hidden implementation specific options these are NOT user visible (should
* move out of sctp.h)
*/
/* sctp_bindx() flags as hidden socket options */
#define SCTP_BINDX_ADD_ADDR 0x00008001
#define SCTP_BINDX_REM_ADDR 0x00008002
/* Hidden socket option that gets the addresses */
#define SCTP_GET_PEER_ADDRESSES 0x00008003
#define SCTP_GET_LOCAL_ADDRESSES 0x00008004
/* return the total count in bytes needed to hold all local addresses bound */
#define SCTP_GET_LOCAL_ADDR_SIZE 0x00008005
/* Return the total count in bytes needed to hold the remote address */
#define SCTP_GET_REMOTE_ADDR_SIZE 0x00008006
/* hidden option for connectx */
#define SCTP_CONNECT_X 0x00008007
/* hidden option for connectx_delayed, part of sendx */
#define SCTP_CONNECT_X_DELAYED 0x00008008
#define SCTP_CONNECT_X_COMPLETE 0x00008009
/* hidden socket option based sctp_peeloff */
#define SCTP_PEELOFF 0x0000800a
/* the real worker for sctp_getaddrlen() */
#define SCTP_GET_ADDR_LEN 0x0000800b
/* Debug things that need to be purged */
#define SCTP_SET_INITIAL_DBG_SEQ 0x00009f00
/* JRS - Supported congestion control modules for pluggable
* congestion control
*/
/* Standard TCP Congestion Control */
#define SCTP_CC_RFC2581 0x00000000
/* High Speed TCP Congestion Control (Floyd) */
#define SCTP_CC_HSTCP 0x00000001
/* HTCP Congestion Control */
#define SCTP_CC_HTCP 0x00000002
/* RTCC Congestion Control - RFC2581 plus */
#define SCTP_CC_RTCC 0x00000003
#define SCTP_CC_OPT_RTCC_SETMODE 0x00002000
#define SCTP_CC_OPT_USE_DCCC_ECN 0x00002001
#define SCTP_CC_OPT_STEADY_STEP 0x00002002
#define SCTP_CMT_OFF 0
#define SCTP_CMT_BASE 1
#define SCTP_CMT_RPV1 2
#define SCTP_CMT_RPV2 3
#define SCTP_CMT_MPTCP 4
#define SCTP_CMT_MAX SCTP_CMT_MPTCP
/* RS - Supported stream scheduling modules for pluggable
* stream scheduling
*/
/* Default simple round-robin */
#define SCTP_SS_DEFAULT 0x00000000
/* Real round-robin */
#define SCTP_SS_ROUND_ROBIN 0x00000001
/* Real round-robin per packet */
#define SCTP_SS_ROUND_ROBIN_PACKET 0x00000002
/* Priority */
#define SCTP_SS_PRIORITY 0x00000003
/* Fair Bandwidth */
#define SCTP_SS_FAIR_BANDWITH 0x00000004
/* First-come, first-serve */
#define SCTP_SS_FIRST_COME 0x00000005
/* fragment interleave constants
* setting must be one of these or
* EINVAL returned.
*/
#define SCTP_FRAG_LEVEL_0 0x00000000
#define SCTP_FRAG_LEVEL_1 0x00000001
#define SCTP_FRAG_LEVEL_2 0x00000002
/*
* user state values
*/
#define SCTP_CLOSED 0x0000
#define SCTP_BOUND 0x1000
#define SCTP_LISTEN 0x2000
#define SCTP_COOKIE_WAIT 0x0002
#define SCTP_COOKIE_ECHOED 0x0004
#define SCTP_ESTABLISHED 0x0008
#define SCTP_SHUTDOWN_SENT 0x0010
#define SCTP_SHUTDOWN_RECEIVED 0x0020
#define SCTP_SHUTDOWN_ACK_SENT 0x0040
#define SCTP_SHUTDOWN_PENDING 0x0080
/*
* SCTP operational error codes (user visible)
*/
#define SCTP_CAUSE_NO_ERROR 0x0000
#define SCTP_CAUSE_INVALID_STREAM 0x0001
#define SCTP_CAUSE_MISSING_PARAM 0x0002
#define SCTP_CAUSE_STALE_COOKIE 0x0003
#define SCTP_CAUSE_OUT_OF_RESC 0x0004
#define SCTP_CAUSE_UNRESOLVABLE_ADDR 0x0005
#define SCTP_CAUSE_UNRECOG_CHUNK 0x0006
#define SCTP_CAUSE_INVALID_PARAM 0x0007
#define SCTP_CAUSE_UNRECOG_PARAM 0x0008
#define SCTP_CAUSE_NO_USER_DATA 0x0009
#define SCTP_CAUSE_COOKIE_IN_SHUTDOWN 0x000a
#define SCTP_CAUSE_RESTART_W_NEWADDR 0x000b
#define SCTP_CAUSE_USER_INITIATED_ABT 0x000c
#define SCTP_CAUSE_PROTOCOL_VIOLATION 0x000d
/* Error causes from RFC5061 */
#define SCTP_CAUSE_DELETING_LAST_ADDR 0x00a0
#define SCTP_CAUSE_RESOURCE_SHORTAGE 0x00a1
#define SCTP_CAUSE_DELETING_SRC_ADDR 0x00a2
#define SCTP_CAUSE_ILLEGAL_ASCONF_ACK 0x00a3
#define SCTP_CAUSE_REQUEST_REFUSED 0x00a4
/* Error causes from nat-draft */
#define SCTP_CAUSE_NAT_COLLIDING_STATE 0x00b0
#define SCTP_CAUSE_NAT_MISSING_STATE 0x00b1
/* Error causes from RFC4895 */
#define SCTP_CAUSE_UNSUPPORTED_HMACID 0x0105
/*
* error cause parameters (user visible)
*/
struct sctp_gen_error_cause {
uint16_t code;
uint16_t length;
uint8_t info[];
} SCTP_PACKED;
struct sctp_error_cause {
uint16_t code;
uint16_t length;
/* optional cause-specific info may follow */
} SCTP_PACKED;
struct sctp_error_invalid_stream {
struct sctp_error_cause cause; /* code=SCTP_CAUSE_INVALID_STREAM */
uint16_t stream_id; /* stream id of the DATA in error */
uint16_t reserved;
} SCTP_PACKED;
struct sctp_error_missing_param {
struct sctp_error_cause cause; /* code=SCTP_CAUSE_MISSING_PARAM */
uint32_t num_missing_params; /* number of missing parameters */
uint16_t type[];
} SCTP_PACKED;
struct sctp_error_stale_cookie {
struct sctp_error_cause cause; /* code=SCTP_CAUSE_STALE_COOKIE */
uint32_t stale_time; /* time in usec of staleness */
} SCTP_PACKED;
struct sctp_error_out_of_resource {
struct sctp_error_cause cause; /* code=SCTP_CAUSE_OUT_OF_RESOURCES */
} SCTP_PACKED;
struct sctp_error_unresolv_addr {
struct sctp_error_cause cause; /* code=SCTP_CAUSE_UNRESOLVABLE_ADDR */
} SCTP_PACKED;
struct sctp_error_unrecognized_chunk {
struct sctp_error_cause cause; /* code=SCTP_CAUSE_UNRECOG_CHUNK */
struct sctp_chunkhdr ch;/* header from chunk in error */
} SCTP_PACKED;
struct sctp_error_no_user_data {
struct sctp_error_cause cause; /* code=SCTP_CAUSE_NO_USER_DATA */
uint32_t tsn; /* TSN of the empty data chunk */
} SCTP_PACKED;
struct sctp_error_auth_invalid_hmac {
struct sctp_error_cause cause; /* code=SCTP_CAUSE_UNSUPPORTED_HMACID */
uint16_t hmac_id;
} SCTP_PACKED;
/*
* Main SCTP chunk types we place these here so natd and f/w's in user land
* can find them.
*/
/************0x00 series ***********/
#define SCTP_DATA 0x00
#define SCTP_INITIATION 0x01
#define SCTP_INITIATION_ACK 0x02
#define SCTP_SELECTIVE_ACK 0x03
#define SCTP_HEARTBEAT_REQUEST 0x04
#define SCTP_HEARTBEAT_ACK 0x05
#define SCTP_ABORT_ASSOCIATION 0x06
#define SCTP_SHUTDOWN 0x07
#define SCTP_SHUTDOWN_ACK 0x08
#define SCTP_OPERATION_ERROR 0x09
#define SCTP_COOKIE_ECHO 0x0a
#define SCTP_COOKIE_ACK 0x0b
#define SCTP_ECN_ECHO 0x0c
#define SCTP_ECN_CWR 0x0d
#define SCTP_SHUTDOWN_COMPLETE 0x0e
/* RFC4895 */
#define SCTP_AUTHENTICATION 0x0f
/* EY nr_sack chunk id*/
#define SCTP_NR_SELECTIVE_ACK 0x10
/************0x40 series ***********/
#define SCTP_IDATA 0x40
/************0x80 series ***********/
/* RFC5061 */
#define SCTP_ASCONF_ACK 0x80
/* draft-ietf-stewart-pktdrpsctp */
#define SCTP_PACKET_DROPPED 0x81
/* draft-ietf-stewart-strreset-xxx */
#define SCTP_STREAM_RESET 0x82
/* RFC4820 */
#define SCTP_PAD_CHUNK 0x84
/************0xc0 series ***********/
/* RFC3758 */
#define SCTP_FORWARD_CUM_TSN 0xc0
/* RFC5061 */
#define SCTP_ASCONF 0xc1
#define SCTP_IFORWARD_CUM_TSN 0xc2
/* ABORT and SHUTDOWN COMPLETE FLAG */
#define SCTP_HAD_NO_TCB 0x01
/* Packet dropped flags */
#define SCTP_FROM_MIDDLE_BOX SCTP_HAD_NO_TCB
#define SCTP_BADCRC 0x02
#define SCTP_PACKET_TRUNCATED 0x04
/* Flag for ECN -CWR */
#define SCTP_CWR_REDUCE_OVERRIDE 0x01
#define SCTP_CWR_IN_SAME_WINDOW 0x02
#define SCTP_SAT_NETWORK_MIN 400 /* min ms for RTT to set satellite
* time */
#define SCTP_SAT_NETWORK_BURST_INCR 2 /* how many times to multiply maxburst
* in sat */
/* Data Chuck Specific Flags */
#define SCTP_DATA_FRAG_MASK 0x03
#define SCTP_DATA_MIDDLE_FRAG 0x00
#define SCTP_DATA_LAST_FRAG 0x01
#define SCTP_DATA_FIRST_FRAG 0x02
#define SCTP_DATA_NOT_FRAG 0x03
#define SCTP_DATA_UNORDERED 0x04
#define SCTP_DATA_SACK_IMMEDIATELY 0x08
/* ECN Nonce: SACK Chunk Specific Flags */
#define SCTP_SACK_NONCE_SUM 0x01
/* CMT DAC algorithm SACK flag */
#define SCTP_SACK_CMT_DAC 0x80
/*
* PCB flags (in sctp_flags bitmask).
* Note the features and flags are meant
* for use by netstat.
*/
#define SCTP_PCB_FLAGS_UDPTYPE 0x00000001
#define SCTP_PCB_FLAGS_TCPTYPE 0x00000002
#define SCTP_PCB_FLAGS_BOUNDALL 0x00000004
#define SCTP_PCB_FLAGS_ACCEPTING 0x00000008
#define SCTP_PCB_FLAGS_UNBOUND 0x00000010
#define SCTP_PCB_FLAGS_CLOSE_IP 0x00040000
#define SCTP_PCB_FLAGS_WAS_CONNECTED 0x00080000
#define SCTP_PCB_FLAGS_WAS_ABORTED 0x00100000
/* TCP model support */
#define SCTP_PCB_FLAGS_CONNECTED 0x00200000
#define SCTP_PCB_FLAGS_IN_TCPPOOL 0x00400000
#define SCTP_PCB_FLAGS_DONT_WAKE 0x00800000
#define SCTP_PCB_FLAGS_WAKEOUTPUT 0x01000000
#define SCTP_PCB_FLAGS_WAKEINPUT 0x02000000
#define SCTP_PCB_FLAGS_BOUND_V6 0x04000000
#define SCTP_PCB_FLAGS_BLOCKING_IO 0x08000000
#define SCTP_PCB_FLAGS_SOCKET_GONE 0x10000000
#define SCTP_PCB_FLAGS_SOCKET_ALLGONE 0x20000000
#define SCTP_PCB_FLAGS_SOCKET_CANT_READ 0x40000000
/* flags to copy to new PCB */
#define SCTP_PCB_COPY_FLAGS (SCTP_PCB_FLAGS_BOUNDALL|\
SCTP_PCB_FLAGS_WAKEINPUT|\
SCTP_PCB_FLAGS_BOUND_V6)
/*
* PCB Features (in sctp_features bitmask)
*/
#define SCTP_PCB_FLAGS_DO_NOT_PMTUD 0x0000000000000001
#define SCTP_PCB_FLAGS_EXT_RCVINFO 0x0000000000000002 /* deprecated */
#define SCTP_PCB_FLAGS_DONOT_HEARTBEAT 0x0000000000000004
#define SCTP_PCB_FLAGS_FRAG_INTERLEAVE 0x0000000000000008
#define SCTP_PCB_FLAGS_INTERLEAVE_STRMS 0x0000000000000010
#define SCTP_PCB_FLAGS_DO_ASCONF 0x0000000000000020
#define SCTP_PCB_FLAGS_AUTO_ASCONF 0x0000000000000040
#define SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE 0x0000000000000080
/* socket options */
#define SCTP_PCB_FLAGS_NODELAY 0x0000000000000100
#define SCTP_PCB_FLAGS_AUTOCLOSE 0x0000000000000200
#define SCTP_PCB_FLAGS_RECVDATAIOEVNT 0x0000000000000400 /* deprecated */
#define SCTP_PCB_FLAGS_RECVASSOCEVNT 0x0000000000000800
#define SCTP_PCB_FLAGS_RECVPADDREVNT 0x0000000000001000
#define SCTP_PCB_FLAGS_RECVPEERERR 0x0000000000002000
#define SCTP_PCB_FLAGS_RECVSENDFAILEVNT 0x0000000000004000 /* deprecated */
#define SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT 0x0000000000008000
#define SCTP_PCB_FLAGS_ADAPTATIONEVNT 0x0000000000010000
#define SCTP_PCB_FLAGS_PDAPIEVNT 0x0000000000020000
#define SCTP_PCB_FLAGS_AUTHEVNT 0x0000000000040000
#define SCTP_PCB_FLAGS_STREAM_RESETEVNT 0x0000000000080000
#define SCTP_PCB_FLAGS_NO_FRAGMENT 0x0000000000100000
#define SCTP_PCB_FLAGS_EXPLICIT_EOR 0x0000000000400000
#define SCTP_PCB_FLAGS_NEEDS_MAPPED_V4 0x0000000000800000
#define SCTP_PCB_FLAGS_MULTIPLE_ASCONFS 0x0000000001000000
#define SCTP_PCB_FLAGS_PORTREUSE 0x0000000002000000
#define SCTP_PCB_FLAGS_DRYEVNT 0x0000000004000000
#define SCTP_PCB_FLAGS_RECVRCVINFO 0x0000000008000000
#define SCTP_PCB_FLAGS_RECVNXTINFO 0x0000000010000000
#define SCTP_PCB_FLAGS_ASSOC_RESETEVNT 0x0000000020000000
#define SCTP_PCB_FLAGS_STREAM_CHANGEEVNT 0x0000000040000000
#define SCTP_PCB_FLAGS_RECVNSENDFAILEVNT 0x0000000080000000
/*-
* mobility_features parameters (by micchie).Note
* these features are applied against the
* sctp_mobility_features flags.. not the sctp_features
* flags.
*/
#define SCTP_MOBILITY_BASE 0x00000001
#define SCTP_MOBILITY_FASTHANDOFF 0x00000002
#define SCTP_MOBILITY_PRIM_DELETED 0x00000004
#define SCTP_SMALLEST_PMTU 512 /* smallest pmtu allowed when disabling PMTU
* discovery */
#undef SCTP_PACKED
#include <netinet/sctp_uio.h>
/* This dictates the size of the packet
* collection buffer. This only applies
* if SCTP_PACKET_LOGGING is enabled in
* your config.
*/
#define SCTP_PACKET_LOG_SIZE 65536
/* Maximum delays and such a user can set for options that
* take ms.
*/
#define SCTP_MAX_SACK_DELAY 500 /* per RFC4960 */
#define SCTP_MAX_HB_INTERVAL 14400000 /* 4 hours in ms */
#define SCTP_MAX_COOKIE_LIFE 3600000 /* 1 hour in ms */
/* Types of logging/KTR tracing that can be enabled via the
* sysctl net.inet.sctp.sctp_logging. You must also enable
* SUBSYS tracing.
* Note that you must have the SCTP option in the kernel
* to enable these as well.
*/
#define SCTP_BLK_LOGGING_ENABLE 0x00000001
#define SCTP_CWND_MONITOR_ENABLE 0x00000002
#define SCTP_CWND_LOGGING_ENABLE 0x00000004
#define SCTP_FLIGHT_LOGGING_ENABLE 0x00000020
#define SCTP_FR_LOGGING_ENABLE 0x00000040
#define SCTP_LOCK_LOGGING_ENABLE 0x00000080
#define SCTP_MAP_LOGGING_ENABLE 0x00000100
#define SCTP_MBCNT_LOGGING_ENABLE 0x00000200
#define SCTP_MBUF_LOGGING_ENABLE 0x00000400
#define SCTP_NAGLE_LOGGING_ENABLE 0x00000800
#define SCTP_RECV_RWND_LOGGING_ENABLE 0x00001000
#define SCTP_RTTVAR_LOGGING_ENABLE 0x00002000
#define SCTP_SACK_LOGGING_ENABLE 0x00004000
#define SCTP_SACK_RWND_LOGGING_ENABLE 0x00008000
#define SCTP_SB_LOGGING_ENABLE 0x00010000
#define SCTP_STR_LOGGING_ENABLE 0x00020000
#define SCTP_WAKE_LOGGING_ENABLE 0x00040000
#define SCTP_LOG_MAXBURST_ENABLE 0x00080000
#define SCTP_LOG_RWND_ENABLE 0x00100000
#define SCTP_LOG_SACK_ARRIVALS_ENABLE 0x00200000
#define SCTP_LTRACE_CHUNK_ENABLE 0x00400000
#define SCTP_LTRACE_ERROR_ENABLE 0x00800000
#define SCTP_LAST_PACKET_TRACING 0x01000000
#define SCTP_THRESHOLD_LOGGING 0x02000000
#define SCTP_LOG_AT_SEND_2_SCTP 0x04000000
#define SCTP_LOG_AT_SEND_2_OUTQ 0x08000000
#define SCTP_LOG_TRY_ADVANCE 0x10000000
#endif /* !_NETINET_SCTP_H_ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,261 @@
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)tcp.h 8.1 (Berkeley) 6/10/93
* $FreeBSD$
*/
#ifndef _NETINET_TCP_H_
#define _NETINET_TCP_H_
#include <sys/cdefs.h>
#include <sys/types.h>
#if __BSD_VISIBLE
typedef u_int32_t tcp_seq;
#define tcp6_seq tcp_seq /* for KAME src sync over BSD*'s */
#define tcp6hdr tcphdr /* for KAME src sync over BSD*'s */
/*
* TCP header.
* Per RFC 793, September, 1981.
*/
struct tcphdr {
u_short th_sport; /* source port */
u_short th_dport; /* destination port */
tcp_seq th_seq; /* sequence number */
tcp_seq th_ack; /* acknowledgement number */
#if BYTE_ORDER == LITTLE_ENDIAN
u_char th_x2:4, /* (unused) */
th_off:4; /* data offset */
#endif
#if BYTE_ORDER == BIG_ENDIAN
u_char th_off:4, /* data offset */
th_x2:4; /* (unused) */
#endif
u_char th_flags;
#define TH_FIN 0x01
#define TH_SYN 0x02
#define TH_RST 0x04
#define TH_PUSH 0x08
#define TH_ACK 0x10
#define TH_URG 0x20
#define TH_ECE 0x40
#define TH_CWR 0x80
#define TH_FLAGS (TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|TH_ECE|TH_CWR)
#define PRINT_TH_FLAGS "\20\1FIN\2SYN\3RST\4PUSH\5ACK\6URG\7ECE\10CWR"
u_short th_win; /* window */
u_short th_sum; /* checksum */
u_short th_urp; /* urgent pointer */
};
#define TCPOPT_EOL 0
#define TCPOLEN_EOL 1
#define TCPOPT_PAD 0 /* padding after EOL */
#define TCPOLEN_PAD 1
#define TCPOPT_NOP 1
#define TCPOLEN_NOP 1
#define TCPOPT_MAXSEG 2
#define TCPOLEN_MAXSEG 4
#define TCPOPT_WINDOW 3
#define TCPOLEN_WINDOW 3
#define TCPOPT_SACK_PERMITTED 4
#define TCPOLEN_SACK_PERMITTED 2
#define TCPOPT_SACK 5
#define TCPOLEN_SACKHDR 2
#define TCPOLEN_SACK 8 /* 2*sizeof(tcp_seq) */
#define TCPOPT_TIMESTAMP 8
#define TCPOLEN_TIMESTAMP 10
#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP+2) /* appendix A */
#define TCPOPT_SIGNATURE 19 /* Keyed MD5: RFC 2385 */
#define TCPOLEN_SIGNATURE 18
#define TCPOPT_FAST_OPEN 34
#define TCPOLEN_FAST_OPEN_EMPTY 2
#define TCPOLEN_FAST_OPEN_MIN 6
#define TCPOLEN_FAST_OPEN_MAX 18
/* Miscellaneous constants */
#define MAX_SACK_BLKS 6 /* Max # SACK blocks stored at receiver side */
#define TCP_MAX_SACK 4 /* MAX # SACKs sent in any segment */
/*
* The default maximum segment size (MSS) to be used for new TCP connections
* when path MTU discovery is not enabled.
*
* RFC879 derives the default MSS from the largest datagram size hosts are
* minimally required to handle directly or through IP reassembly minus the
* size of the IP and TCP header. With IPv6 the minimum MTU is specified
* in RFC2460.
*
* For IPv4 the MSS is 576 - sizeof(struct tcpiphdr)
* For IPv6 the MSS is IPV6_MMTU - sizeof(struct ip6_hdr) - sizeof(struct tcphdr)
*
* We use explicit numerical definition here to avoid header pollution.
*/
#define TCP_MSS 536
#define TCP6_MSS 1220
/*
* Limit the lowest MSS we accept for path MTU discovery and the TCP SYN MSS
* option. Allowing low values of MSS can consume significant resources and
* be used to mount a resource exhaustion attack.
* Connections requesting lower MSS values will be rounded up to this value
* and the IP_DF flag will be cleared to allow fragmentation along the path.
*
* See tcp_subr.c tcp_minmss SYSCTL declaration for more comments. Setting
* it to "0" disables the minmss check.
*
* The default value is fine for TCP across the Internet's smallest official
* link MTU (256 bytes for AX.25 packet radio). However, a connection is very
* unlikely to come across such low MTU interfaces these days (anno domini 2003).
*/
#define TCP_MINMSS 216
#define TCP_MAXWIN 65535 /* largest value for (unscaled) window */
#define TTCP_CLIENT_SND_WND 4096 /* dflt send window for T/TCP client */
#define TCP_MAX_WINSHIFT 14 /* maximum window shift */
#define TCP_MAXBURST 4 /* maximum segments in a burst */
#define TCP_MAXHLEN (0xf<<2) /* max length of header in bytes */
#define TCP_MAXOLEN (TCP_MAXHLEN - sizeof(struct tcphdr))
/* max space left for options */
#endif /* __BSD_VISIBLE */
/*
* User-settable options (used with setsockopt). These are discrete
* values and are not masked together. Some values appear to be
* bitmasks for historical reasons.
*/
#define TCP_NODELAY 1 /* don't delay send to coalesce packets */
#if __BSD_VISIBLE
#define TCP_MAXSEG 2 /* set maximum segment size */
#define TCP_NOPUSH 4 /* don't push last block of write */
#define TCP_NOOPT 8 /* don't use TCP options */
#define TCP_MD5SIG 16 /* use MD5 digests (RFC2385) */
#define TCP_INFO 32 /* retrieve tcp_info structure */
#define TCP_CONGESTION 64 /* get/set congestion control algorithm */
#define TCP_CCALGOOPT 65 /* get/set cc algorithm specific options */
#define TCP_KEEPINIT 128 /* N, time to establish connection */
#define TCP_KEEPIDLE 256 /* L,N,X start keeplives after this period */
#define TCP_KEEPINTVL 512 /* L,N interval between keepalives */
#define TCP_KEEPCNT 1024 /* L,N number of keepalives before close */
#define TCP_FASTOPEN 1025 /* enable TFO / was created via TFO */
#define TCP_PCAP_OUT 2048 /* number of output packets to keep */
#define TCP_PCAP_IN 4096 /* number of input packets to keep */
#define TCP_FUNCTION_BLK 8192 /* Set the tcp function pointers to the specified stack */
/* Start of reserved space for third-party user-settable options. */
#define TCP_VENDOR SO_VENDOR
#define TCP_CA_NAME_MAX 16 /* max congestion control name length */
#define TCPI_OPT_TIMESTAMPS 0x01
#define TCPI_OPT_SACK 0x02
#define TCPI_OPT_WSCALE 0x04
#define TCPI_OPT_ECN 0x08
#define TCPI_OPT_TOE 0x10
/*
* The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits
* the caller to query certain information about the state of a TCP
* connection. We provide an overlapping set of fields with the Linux
* implementation, but since this is a fixed size structure, room has been
* left for growth. In order to maximize potential future compatibility with
* the Linux API, the same variable names and order have been adopted, and
* padding left to make room for omitted fields in case they are added later.
*
* XXX: This is currently an unstable ABI/API, in that it is expected to
* change.
*/
struct tcp_info {
u_int8_t tcpi_state; /* TCP FSM state. */
u_int8_t __tcpi_ca_state;
u_int8_t __tcpi_retransmits;
u_int8_t __tcpi_probes;
u_int8_t __tcpi_backoff;
u_int8_t tcpi_options; /* Options enabled on conn. */
u_int8_t tcpi_snd_wscale:4, /* RFC1323 send shift value. */
tcpi_rcv_wscale:4; /* RFC1323 recv shift value. */
u_int32_t tcpi_rto; /* Retransmission timeout (usec). */
u_int32_t __tcpi_ato;
u_int32_t tcpi_snd_mss; /* Max segment size for send. */
u_int32_t tcpi_rcv_mss; /* Max segment size for receive. */
u_int32_t __tcpi_unacked;
u_int32_t __tcpi_sacked;
u_int32_t __tcpi_lost;
u_int32_t __tcpi_retrans;
u_int32_t __tcpi_fackets;
/* Times; measurements in usecs. */
u_int32_t __tcpi_last_data_sent;
u_int32_t __tcpi_last_ack_sent; /* Also unimpl. on Linux? */
u_int32_t tcpi_last_data_recv; /* Time since last recv data. */
u_int32_t __tcpi_last_ack_recv;
/* Metrics; variable units. */
u_int32_t __tcpi_pmtu;
u_int32_t __tcpi_rcv_ssthresh;
u_int32_t tcpi_rtt; /* Smoothed RTT in usecs. */
u_int32_t tcpi_rttvar; /* RTT variance in usecs. */
u_int32_t tcpi_snd_ssthresh; /* Slow start threshold. */
u_int32_t tcpi_snd_cwnd; /* Send congestion window. */
u_int32_t __tcpi_advmss;
u_int32_t __tcpi_reordering;
u_int32_t __tcpi_rcv_rtt;
u_int32_t tcpi_rcv_space; /* Advertised recv window. */
/* FreeBSD extensions to tcp_info. */
u_int32_t tcpi_snd_wnd; /* Advertised send window. */
u_int32_t tcpi_snd_bwnd; /* No longer used. */
u_int32_t tcpi_snd_nxt; /* Next egress seqno */
u_int32_t tcpi_rcv_nxt; /* Next ingress seqno */
u_int32_t tcpi_toe_tid; /* HWTID for TOE endpoints */
u_int32_t tcpi_snd_rexmitpack; /* Retransmitted packets */
u_int32_t tcpi_rcv_ooopack; /* Out-of-order packets */
u_int32_t tcpi_snd_zerowin; /* Zero-sized windows sent */
/* Padding to grow without breaking ABI. */
u_int32_t __tcpi_pad[26]; /* Padding. */
};
#endif
#define TCP_FUNCTION_NAME_LEN_MAX 32
struct tcp_function_set {
char function_set_name[TCP_FUNCTION_NAME_LEN_MAX];
uint32_t pcbcnt;
};
#endif /* !_NETINET_TCP_H_ */

View File

@ -0,0 +1,112 @@
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)tcp_fsm.h 8.1 (Berkeley) 6/10/93
* $FreeBSD$
*/
#ifndef _NETINET_TCP_FSM_H_
#define _NETINET_TCP_FSM_H_
/*
* TCP FSM state definitions.
*
* Per RFC793, September, 1981.
*/
#define TCP_NSTATES 11
#define TCPS_CLOSED 0 /* closed */
#define TCPS_LISTEN 1 /* listening for connection */
#define TCPS_SYN_SENT 2 /* active, have sent syn */
#define TCPS_SYN_RECEIVED 3 /* have sent and received syn */
/* states < TCPS_ESTABLISHED are those where connections not established */
#define TCPS_ESTABLISHED 4 /* established */
#define TCPS_CLOSE_WAIT 5 /* rcvd fin, waiting for close */
/* states > TCPS_CLOSE_WAIT are those where user has closed */
#define TCPS_FIN_WAIT_1 6 /* have closed, sent fin */
#define TCPS_CLOSING 7 /* closed xchd FIN; await FIN ACK */
#define TCPS_LAST_ACK 8 /* had fin and close; await FIN ACK */
/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */
#define TCPS_FIN_WAIT_2 9 /* have closed, fin is acked */
#define TCPS_TIME_WAIT 10 /* in 2*msl quiet wait after close */
/* for KAME src sync over BSD*'s */
#define TCP6_NSTATES TCP_NSTATES
#define TCP6S_CLOSED TCPS_CLOSED
#define TCP6S_LISTEN TCPS_LISTEN
#define TCP6S_SYN_SENT TCPS_SYN_SENT
#define TCP6S_SYN_RECEIVED TCPS_SYN_RECEIVED
#define TCP6S_ESTABLISHED TCPS_ESTABLISHED
#define TCP6S_CLOSE_WAIT TCPS_CLOSE_WAIT
#define TCP6S_FIN_WAIT_1 TCPS_FIN_WAIT_1
#define TCP6S_CLOSING TCPS_CLOSING
#define TCP6S_LAST_ACK TCPS_LAST_ACK
#define TCP6S_FIN_WAIT_2 TCPS_FIN_WAIT_2
#define TCP6S_TIME_WAIT TCPS_TIME_WAIT
#define TCPS_HAVERCVDSYN(s) ((s) >= TCPS_SYN_RECEIVED)
#define TCPS_HAVEESTABLISHED(s) ((s) >= TCPS_ESTABLISHED)
#define TCPS_HAVERCVDFIN(s) ((s) >= TCPS_TIME_WAIT)
#ifdef TCPOUTFLAGS
/*
* Flags used when sending segments in tcp_output. Basic flags (TH_RST,
* TH_ACK,TH_SYN,TH_FIN) are totally determined by state, with the proviso
* that TH_FIN is sent only if all data queued for output is included in the
* segment.
*/
static u_char tcp_outflags[TCP_NSTATES] = {
TH_RST|TH_ACK, /* 0, CLOSED */
0, /* 1, LISTEN */
TH_SYN, /* 2, SYN_SENT */
TH_SYN|TH_ACK, /* 3, SYN_RECEIVED */
TH_ACK, /* 4, ESTABLISHED */
TH_ACK, /* 5, CLOSE_WAIT */
TH_FIN|TH_ACK, /* 6, FIN_WAIT_1 */
TH_FIN|TH_ACK, /* 7, CLOSING */
TH_FIN|TH_ACK, /* 8, LAST_ACK */
TH_ACK, /* 9, FIN_WAIT_2 */
TH_ACK, /* 10, TIME_WAIT */
};
#endif
#ifdef KPROF
int tcp_acounts[TCP_NSTATES][PRU_NREQ];
#endif
#ifdef TCPSTATES
static char const * const tcpstates[] = {
"CLOSED", "LISTEN", "SYN_SENT", "SYN_RCVD",
"ESTABLISHED", "CLOSE_WAIT", "FIN_WAIT_1", "CLOSING",
"LAST_ACK", "FIN_WAIT_2", "TIME_WAIT",
};
#endif
#endif

View File

@ -0,0 +1,95 @@
/*-
* Copyright (c) 1982, 1986, 1993, 1995
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)tcp_seq.h 8.3 (Berkeley) 6/21/95
* $FreeBSD$
*/
#ifndef _NETINET_TCP_SEQ_H_
#define _NETINET_TCP_SEQ_H_
/*
* TCP sequence numbers are 32 bit integers operated
* on with modular arithmetic. These macros can be
* used to compare such integers.
*/
#define SEQ_LT(a,b) ((int)((a)-(b)) < 0)
#define SEQ_LEQ(a,b) ((int)((a)-(b)) <= 0)
#define SEQ_GT(a,b) ((int)((a)-(b)) > 0)
#define SEQ_GEQ(a,b) ((int)((a)-(b)) >= 0)
#define SEQ_MIN(a, b) ((SEQ_LT(a, b)) ? (a) : (b))
#define SEQ_MAX(a, b) ((SEQ_GT(a, b)) ? (a) : (b))
/* for modulo comparisons of timestamps */
#define TSTMP_LT(a,b) ((int)((a)-(b)) < 0)
#define TSTMP_GT(a,b) ((int)((a)-(b)) > 0)
#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0)
/*
* Macros to initialize tcp sequence numbers for
* send and receive from initial send and receive
* sequence numbers.
*/
#define tcp_rcvseqinit(tp) \
(tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1
#define tcp_sendseqinit(tp) \
(tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = \
(tp)->snd_recover = (tp)->iss
#ifdef _KERNEL
/*
* Clock macros for RFC 1323 timestamps.
*/
#define TCP_TS_TO_TICKS(_t) ((_t) * hz / 1000)
/* Timestamp wrap-around time, 24 days. */
#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * 1000)
/*
* tcp_ts_getticks() in ms, should be 1ms < x < 1000ms according to RFC 1323.
* We always use 1ms granularity independent of hz.
*/
static __inline u_int
tcp_ts_getticks(void)
{
struct timeval tv;
u_long ms;
/*
* getmicrouptime() should be good enough for any 1-1000ms granularity.
* Do not use getmicrotime() here as it might break nfsroot/tcp.
*/
getmicrouptime(&tv);
ms = tv.tv_sec * 1000 + tv.tv_usec / 1000;
return (ms);
}
#endif /* _KERNEL */
#endif /* _NETINET_TCP_SEQ_H_ */

View File

@ -0,0 +1,208 @@
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93
* $FreeBSD$
*/
#ifndef _NETINET_TCP_TIMER_H_
#define _NETINET_TCP_TIMER_H_
/*
* The TCPT_REXMT timer is used to force retransmissions.
* The TCP has the TCPT_REXMT timer set whenever segments
* have been sent for which ACKs are expected but not yet
* received. If an ACK is received which advances tp->snd_una,
* then the retransmit timer is cleared (if there are no more
* outstanding segments) or reset to the base value (if there
* are more ACKs expected). Whenever the retransmit timer goes off,
* we retransmit one unacknowledged segment, and do a backoff
* on the retransmit timer.
*
* The TCPT_PERSIST timer is used to keep window size information
* flowing even if the window goes shut. If all previous transmissions
* have been acknowledged (so that there are no retransmissions in progress),
* and the window is too small to bother sending anything, then we start
* the TCPT_PERSIST timer. When it expires, if the window is nonzero,
* we go to transmit state. Otherwise, at intervals send a single byte
* into the peer's window to force him to update our window information.
* We do this at most as often as TCPT_PERSMIN time intervals,
* but no more frequently than the current estimate of round-trip
* packet time. The TCPT_PERSIST timer is cleared whenever we receive
* a window update from the peer.
*
* The TCPT_KEEP timer is used to keep connections alive. If an
* connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time,
* but not yet established, then we drop the connection. Once the connection
* is established, if the connection is idle for TCPTV_KEEP_IDLE time
* (and keepalives have been enabled on the socket), we begin to probe
* the connection. We force the peer to send us a segment by sending:
* <SEQ=SND.UNA-1><ACK=RCV.NXT><CTL=ACK>
* This segment is (deliberately) outside the window, and should elicit
* an ack segment in response from the peer. If, despite the TCPT_KEEP
* initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE
* amount of time probing, then we drop the connection.
*/
/*
* Time constants.
*/
#define TCPTV_MSL ( 30*hz) /* max seg lifetime (hah!) */
#define TCPTV_SRTTBASE 0 /* base roundtrip time;
if 0, no idea yet */
#define TCPTV_RTOBASE ( 3*hz) /* assumed RTO if no info */
#define TCPTV_PERSMIN ( 5*hz) /* minimum persist interval */
#define TCPTV_PERSMAX ( 60*hz) /* maximum persist interval */
#define TCPTV_KEEP_INIT ( 75*hz) /* initial connect keepalive */
#define TCPTV_KEEP_IDLE (120*60*hz) /* dflt time before probing */
#define TCPTV_KEEPINTVL ( 75*hz) /* default probe interval */
#define TCPTV_KEEPCNT 8 /* max probes before drop */
#define TCPTV_FINWAIT2_TIMEOUT (60*hz) /* FIN_WAIT_2 timeout if no receiver */
/*
* Minimum retransmit timer is 3 ticks, for algorithmic stability.
* TCPT_RANGESET() will add another TCPTV_CPU_VAR to deal with
* the expected worst-case processing variances by the kernels
* representing the end points. Such variances do not always show
* up in the srtt because the timestamp is often calculated at
* the interface rather then at the TCP layer. This value is
* typically 50ms. However, it is also possible that delayed
* acks (typically 100ms) could create issues so we set the slop
* to 200ms to try to cover it. Note that, properly speaking,
* delayed-acks should not create a major issue for interactive
* environments which 'P'ush the last segment, at least as
* long as implementations do the required 'at least one ack
* for every two packets' for the non-interactive streaming case.
* (maybe the RTO calculation should use 2*RTT instead of RTT
* to handle the ack-every-other-packet case).
*
* The prior minimum of 1*hz (1 second) badly breaks throughput on any
* networks faster then a modem that has minor (e.g. 1%) packet loss.
*/
#define TCPTV_MIN ( hz/33 ) /* minimum allowable value */
#define TCPTV_CPU_VAR ( hz/5 ) /* cpu variance allowed (200ms) */
#define TCPTV_REXMTMAX ( 64*hz) /* max allowable REXMT value */
#define TCPTV_TWTRUNC 8 /* RTO factor to truncate TW */
#define TCP_LINGERTIME 120 /* linger at most 2 minutes */
#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */
#define TCPTV_DELACK ( hz/10 ) /* 100ms timeout */
#ifdef TCPTIMERS
static const char *tcptimers[] =
{ "REXMT", "PERSIST", "KEEP", "2MSL", "DELACK" };
#endif
/*
* Force a time value to be in a certain range.
*/
#define TCPT_RANGESET(tv, value, tvmin, tvmax) do { \
(tv) = (value) + tcp_rexmit_slop; \
if ((u_long)(tv) < (u_long)(tvmin)) \
(tv) = (tvmin); \
if ((u_long)(tv) > (u_long)(tvmax)) \
(tv) = (tvmax); \
} while(0)
#ifdef _KERNEL
struct xtcp_timer;
struct tcp_timer {
struct callout tt_rexmt; /* retransmit timer */
struct callout tt_persist; /* retransmit persistence */
struct callout tt_keep; /* keepalive */
struct callout tt_2msl; /* 2*msl TIME_WAIT timer */
struct callout tt_delack; /* delayed ACK timer */
uint32_t tt_flags; /* Timers flags */
uint32_t tt_draincnt; /* Count being drained */
};
/*
* Flags for the tt_flags field.
*/
#define TT_DELACK 0x0001
#define TT_REXMT 0x0002
#define TT_PERSIST 0x0004
#define TT_KEEP 0x0008
#define TT_2MSL 0x0010
#define TT_MASK (TT_DELACK|TT_REXMT|TT_PERSIST|TT_KEEP|TT_2MSL)
#define TT_DELACK_RST 0x0100
#define TT_REXMT_RST 0x0200
#define TT_PERSIST_RST 0x0400
#define TT_KEEP_RST 0x0800
#define TT_2MSL_RST 0x1000
#define TT_STOPPED 0x00010000
#define TP_KEEPINIT(tp) ((tp)->t_keepinit ? (tp)->t_keepinit : tcp_keepinit)
#define TP_KEEPIDLE(tp) ((tp)->t_keepidle ? (tp)->t_keepidle : tcp_keepidle)
#define TP_KEEPINTVL(tp) ((tp)->t_keepintvl ? (tp)->t_keepintvl : tcp_keepintvl)
#define TP_KEEPCNT(tp) ((tp)->t_keepcnt ? (tp)->t_keepcnt : tcp_keepcnt)
#define TP_MAXIDLE(tp) (TP_KEEPCNT(tp) * TP_KEEPINTVL(tp))
extern int tcp_persmin; /* minimum persist interval */
extern int tcp_persmax; /* maximum persist interval */
extern int tcp_keepinit; /* time to establish connection */
extern int tcp_keepidle; /* time before keepalive probes begin */
extern int tcp_keepintvl; /* time between keepalive probes */
extern int tcp_keepcnt; /* number of keepalives */
extern int tcp_delacktime; /* time before sending a delayed ACK */
extern int tcp_maxpersistidle;
extern int tcp_rexmit_min;
extern int tcp_rexmit_slop;
extern int tcp_msl;
extern int tcp_ttl; /* time to live for TCP segs */
extern int tcp_backoff[];
extern int tcp_syn_backoff[];
extern int tcp_finwait2_timeout;
extern int tcp_fast_finwait2_recycle;
void tcp_timer_init(void);
void tcp_timer_2msl(void *xtp);
void tcp_timer_discard(void *);
struct tcptw *
tcp_tw_2msl_scan(int reuse); /* XXX temporary? */
void tcp_timer_keep(void *xtp);
void tcp_timer_persist(void *xtp);
void tcp_timer_rexmt(void *xtp);
void tcp_timer_delack(void *xtp);
void tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
struct xtcp_timer *xtimer);
#endif /* _KERNEL */
#endif /* !_NETINET_TCP_TIMER_H_ */

View File

@ -0,0 +1,883 @@
/*-
* Copyright (c) 1982, 1986, 1993, 1994, 1995
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)tcp_var.h 8.4 (Berkeley) 5/24/95
* $FreeBSD$
*/
#ifndef _NETINET_TCP_VAR_H_
#define _NETINET_TCP_VAR_H_
#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#ifdef _KERNEL
#include <net/vnet.h>
#include <sys/mbuf.h>
/*
* Kernel variables for tcp.
*/
VNET_DECLARE(int, tcp_do_rfc1323);
#define V_tcp_do_rfc1323 VNET(tcp_do_rfc1323)
#endif /* _KERNEL */
/* TCP segment queue entry */
struct tseg_qent {
LIST_ENTRY(tseg_qent) tqe_q;
int tqe_len; /* TCP segment data length */
struct tcphdr *tqe_th; /* a pointer to tcp header */
struct mbuf *tqe_m; /* mbuf contains packet */
};
LIST_HEAD(tsegqe_head, tseg_qent);
struct sackblk {
tcp_seq start; /* start seq no. of sack block */
tcp_seq end; /* end seq no. */
};
struct sackhole {
tcp_seq start; /* start seq no. of hole */
tcp_seq end; /* end seq no. */
tcp_seq rxmit; /* next seq. no in hole to be retransmitted */
TAILQ_ENTRY(sackhole) scblink; /* scoreboard linkage */
};
struct sackhint {
struct sackhole *nexthole;
int sack_bytes_rexmit;
tcp_seq last_sack_ack; /* Most recent/largest sacked ack */
int ispare; /* explicit pad for 64bit alignment */
int sacked_bytes; /*
* Total sacked bytes reported by the
* receiver via sack option
*/
uint32_t _pad1[1]; /* TBD */
uint64_t _pad[1]; /* TBD */
};
struct tcptemp {
u_char tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */
struct tcphdr tt_t;
};
#define tcp6cb tcpcb /* for KAME src sync over BSD*'s */
/*
* TODO: We yet need to brave plowing in
* to tcp_input() and the pru_usrreq() block.
* Right now these go to the old standards which
* are somewhat ok, but in the long term may
* need to be changed. If we do tackle tcp_input()
* then we need to get rid of the tcp_do_segment()
* function below.
*/
/* Flags for tcp functions */
#define TCP_FUNC_BEING_REMOVED 0x01 /* Can no longer be referenced */
struct tcpcb;
struct inpcb;
struct sockopt;
struct socket;
/*
* If defining the optional tcp_timers, in the
* tfb_tcp_timer_stop call you must use the
* callout_async_drain() function with the
* tcp_timer_discard callback. You should check
* the return of callout_async_drain() and if 0
* increment tt_draincnt. Since the timer sub-system
* does not know your callbacks you must provide a
* stop_all function that loops through and calls
* tcp_timer_stop() with each of your defined timers.
*/
struct tcp_function_block {
char tfb_tcp_block_name[TCP_FUNCTION_NAME_LEN_MAX];
int (*tfb_tcp_output)(struct tcpcb *);
void (*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *,
struct socket *, struct tcpcb *,
int, int, uint8_t,
int);
int (*tfb_tcp_ctloutput)(struct socket *so, struct sockopt *sopt,
struct inpcb *inp, struct tcpcb *tp);
/* Optional memory allocation/free routine */
void (*tfb_tcp_fb_init)(struct tcpcb *);
void (*tfb_tcp_fb_fini)(struct tcpcb *);
/* Optional timers, must define all if you define one */
int (*tfb_tcp_timer_stop_all)(struct tcpcb *);
void (*tfb_tcp_timer_activate)(struct tcpcb *,
uint32_t, u_int);
int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t);
void (*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t);
void (*tfb_tcp_rexmit_tmr)(struct tcpcb *);
volatile uint32_t tfb_refcnt;
uint32_t tfb_flags;
};
struct tcp_function {
TAILQ_ENTRY(tcp_function) tf_next;
struct tcp_function_block *tf_fb;
};
TAILQ_HEAD(tcp_funchead, tcp_function);
/*
* Tcp control block, one per tcp; fields:
* Organized for 16 byte cacheline efficiency.
*/
struct tcpcb {
struct tsegqe_head t_segq; /* segment reassembly queue */
void *t_pspare[2]; /* new reassembly queue */
int t_segqlen; /* segment reassembly queue length */
int t_dupacks; /* consecutive dup acks recd */
struct tcp_timer *t_timers; /* All the TCP timers in one struct */
struct inpcb *t_inpcb; /* back pointer to internet pcb */
int t_state; /* state of this connection */
u_int t_flags;
struct vnet *t_vnet; /* back pointer to parent vnet */
tcp_seq snd_una; /* sent but unacknowledged */
tcp_seq snd_max; /* highest sequence number sent;
* used to recognize retransmits
*/
tcp_seq snd_nxt; /* send next */
tcp_seq snd_up; /* send urgent pointer */
tcp_seq snd_wl1; /* window update seg seq number */
tcp_seq snd_wl2; /* window update seg ack number */
tcp_seq iss; /* initial send sequence number */
tcp_seq irs; /* initial receive sequence number */
tcp_seq rcv_nxt; /* receive next */
tcp_seq rcv_adv; /* advertised window */
u_long rcv_wnd; /* receive window */
tcp_seq rcv_up; /* receive urgent pointer */
u_long snd_wnd; /* send window */
u_long snd_cwnd; /* congestion-controlled window */
u_long snd_spare1; /* unused */
u_long snd_ssthresh; /* snd_cwnd size threshold for
* for slow start exponential to
* linear switch
*/
u_long snd_spare2; /* unused */
tcp_seq snd_recover; /* for use in NewReno Fast Recovery */
u_int t_rcvtime; /* inactivity time */
u_int t_starttime; /* time connection was established */
u_int t_rtttime; /* RTT measurement start time */
tcp_seq t_rtseq; /* sequence number being timed */
u_int t_bw_spare1; /* unused */
tcp_seq t_bw_spare2; /* unused */
int t_rxtcur; /* current retransmit value (ticks) */
u_int t_maxseg; /* maximum segment size */
u_int t_pmtud_saved_maxseg; /* pre-blackhole MSS */
int t_srtt; /* smoothed round-trip time */
int t_rttvar; /* variance in round-trip time */
int t_rxtshift; /* log(2) of rexmt exp. backoff */
u_int t_rttmin; /* minimum rtt allowed */
u_int t_rttbest; /* best rtt we've seen */
u_long t_rttupdated; /* number of times rtt sampled */
u_long max_sndwnd; /* largest window peer has offered */
int t_softerror; /* possible error not yet reported */
/* out-of-band data */
char t_oobflags; /* have some */
char t_iobc; /* input character */
/* RFC 1323 variables */
u_char snd_scale; /* window scaling for send window */
u_char rcv_scale; /* window scaling for recv window */
u_char request_r_scale; /* pending window scaling */
u_int32_t ts_recent; /* timestamp echo data */
u_int ts_recent_age; /* when last updated */
u_int32_t ts_offset; /* our timestamp offset */
tcp_seq last_ack_sent;
/* experimental */
u_long snd_cwnd_prev; /* cwnd prior to retransmit */
u_long snd_ssthresh_prev; /* ssthresh prior to retransmit */
tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */
int t_sndzerowin; /* zero-window updates sent */
u_int t_badrxtwin; /* window for retransmit recovery */
u_char snd_limited; /* segments limited transmitted */
/* SACK related state */
int snd_numholes; /* number of holes seen by sender */
TAILQ_HEAD(sackhole_head, sackhole) snd_holes;
/* SACK scoreboard (sorted) */
tcp_seq snd_fack; /* last seq number(+1) sack'd by rcv'r*/
int rcv_numsacks; /* # distinct sack blks present */
struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */
tcp_seq sack_newdata; /* New data xmitted in this recovery
episode starts at this seq number */
struct sackhint sackhint; /* SACK scoreboard hint */
int t_rttlow; /* smallest observerved RTT */
u_int32_t rfbuf_ts; /* recv buffer autoscaling timestamp */
int rfbuf_cnt; /* recv buffer autoscaling byte count */
struct toedev *tod; /* toedev handling this connection */
int t_sndrexmitpack; /* retransmit packets sent */
int t_rcvoopack; /* out-of-order packets received */
void *t_toe; /* TOE pcb pointer */
int t_bytes_acked; /* # bytes acked during current RTT */
struct cc_algo *cc_algo; /* congestion control algorithm */
struct cc_var *ccv; /* congestion control specific vars */
struct osd *osd; /* storage for Khelp module data */
u_int t_keepinit; /* time to establish connection */
u_int t_keepidle; /* time before keepalive probes begin */
u_int t_keepintvl; /* interval between keepalives */
u_int t_keepcnt; /* number of keepalives before close */
u_int t_tsomax; /* TSO total burst length limit in bytes */
u_int t_tsomaxsegcount; /* TSO maximum segment count */
u_int t_tsomaxsegsize; /* TSO maximum segment size in bytes */
u_int t_flags2; /* More tcpcb flags storage */
#if defined(_KERNEL) && defined(TCP_RFC7413)
uint32_t t_ispare[6]; /* 5 UTO, 1 TBD */
uint64_t t_tfo_cookie; /* TCP Fast Open cookie */
#else
uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */
#endif
struct tcp_function_block *t_fb;/* TCP function call block */
void *t_fb_ptr; /* Pointer to t_fb specific data */
#if defined(_KERNEL) && defined(TCP_RFC7413)
unsigned int *t_tfo_pending; /* TCP Fast Open pending counter */
void *t_pspare2[1]; /* 1 TCP_SIGNATURE */
#else
void *t_pspare2[2]; /* 1 TCP_SIGNATURE, 1 TBD */
#endif
#if defined(_KERNEL) && defined(TCPPCAP)
struct mbufq t_inpkts; /* List of saved input packets. */
struct mbufq t_outpkts; /* List of saved output packets. */
#ifdef _LP64
uint64_t _pad[0]; /* all used! */
#else
uint64_t _pad[2]; /* 2 are available */
#endif /* _LP64 */
#else
uint64_t _pad[6];
#endif /* defined(_KERNEL) && defined(TCPPCAP) */
};
/*
* Flags and utility macros for the t_flags field.
*/
#define TF_ACKNOW 0x000001 /* ack peer immediately */
#define TF_DELACK 0x000002 /* ack, but try to delay it */
#define TF_NODELAY 0x000004 /* don't delay packets to coalesce */
#define TF_NOOPT 0x000008 /* don't use tcp options */
#define TF_SENTFIN 0x000010 /* have sent FIN */
#define TF_REQ_SCALE 0x000020 /* have/will request window scaling */
#define TF_RCVD_SCALE 0x000040 /* other side has requested scaling */
#define TF_REQ_TSTMP 0x000080 /* have/will request timestamps */
#define TF_RCVD_TSTMP 0x000100 /* a timestamp was received in SYN */
#define TF_SACK_PERMIT 0x000200 /* other side said I could SACK */
#define TF_NEEDSYN 0x000400 /* send SYN (implicit state) */
#define TF_NEEDFIN 0x000800 /* send FIN (implicit state) */
#define TF_NOPUSH 0x001000 /* don't push */
#define TF_PREVVALID 0x002000 /* saved values for bad rxmit valid */
#define TF_MORETOCOME 0x010000 /* More data to be appended to sock */
#define TF_LQ_OVERFLOW 0x020000 /* listen queue overflow */
#define TF_LASTIDLE 0x040000 /* connection was previously idle */
#define TF_RXWIN0SENT 0x080000 /* sent a receiver win 0 in response */
#define TF_FASTRECOVERY 0x100000 /* in NewReno Fast Recovery */
#define TF_WASFRECOVERY 0x200000 /* was in NewReno Fast Recovery */
#define TF_SIGNATURE 0x400000 /* require MD5 digests (RFC2385) */
#define TF_FORCEDATA 0x800000 /* force out a byte */
#define TF_TSO 0x1000000 /* TSO enabled on this connection */
#define TF_TOE 0x2000000 /* this connection is offloaded */
#define TF_ECN_PERMIT 0x4000000 /* connection ECN-ready */
#define TF_ECN_SND_CWR 0x8000000 /* ECN CWR in queue */
#define TF_ECN_SND_ECE 0x10000000 /* ECN ECE in queue */
#define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */
#define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */
#define TF_FASTOPEN 0x80000000 /* TCP Fast Open indication */
#define IN_FASTRECOVERY(t_flags) (t_flags & TF_FASTRECOVERY)
#define ENTER_FASTRECOVERY(t_flags) t_flags |= TF_FASTRECOVERY
#define EXIT_FASTRECOVERY(t_flags) t_flags &= ~TF_FASTRECOVERY
#define IN_CONGRECOVERY(t_flags) (t_flags & TF_CONGRECOVERY)
#define ENTER_CONGRECOVERY(t_flags) t_flags |= TF_CONGRECOVERY
#define EXIT_CONGRECOVERY(t_flags) t_flags &= ~TF_CONGRECOVERY
#define IN_RECOVERY(t_flags) (t_flags & (TF_CONGRECOVERY | TF_FASTRECOVERY))
#define ENTER_RECOVERY(t_flags) t_flags |= (TF_CONGRECOVERY | TF_FASTRECOVERY)
#define EXIT_RECOVERY(t_flags) t_flags &= ~(TF_CONGRECOVERY | TF_FASTRECOVERY)
#define BYTES_THIS_ACK(tp, th) (th->th_ack - tp->snd_una)
/*
* Flags for the t_oobflags field.
*/
#define TCPOOB_HAVEDATA 0x01
#define TCPOOB_HADDATA 0x02
#ifdef TCP_SIGNATURE
/*
* Defines which are needed by the xform_tcp module and tcp_[in|out]put
* for SADB verification and lookup.
*/
#define TCP_SIGLEN 16 /* length of computed digest in bytes */
#define TCP_KEYLEN_MIN 1 /* minimum length of TCP-MD5 key */
#define TCP_KEYLEN_MAX 80 /* maximum length of TCP-MD5 key */
/*
* Only a single SA per host may be specified at this time. An SPI is
* needed in order for the KEY_ALLOCSA() lookup to work.
*/
#define TCP_SIG_SPI 0x1000
#endif /* TCP_SIGNATURE */
/*
* Flags for PLPMTU handling, t_flags2
*/
#define TF2_PLPMTU_BLACKHOLE 0x00000001 /* Possible PLPMTUD Black Hole. */
#define TF2_PLPMTU_PMTUD 0x00000002 /* Allowed to attempt PLPMTUD. */
#define TF2_PLPMTU_MAXSEGSNT 0x00000004 /* Last seg sent was full seg. */
/*
* Structure to hold TCP options that are only used during segment
* processing (in tcp_input), but not held in the tcpcb.
* It's basically used to reduce the number of parameters
* to tcp_dooptions and tcp_addoptions.
* The binary order of the to_flags is relevant for packing of the
* options in tcp_addoptions.
*/
struct tcpopt {
u_int32_t to_flags; /* which options are present */
#define TOF_MSS 0x0001 /* maximum segment size */
#define TOF_SCALE 0x0002 /* window scaling */
#define TOF_SACKPERM 0x0004 /* SACK permitted */
#define TOF_TS 0x0010 /* timestamp */
#define TOF_SIGNATURE 0x0040 /* TCP-MD5 signature option (RFC2385) */
#define TOF_SACK 0x0080 /* Peer sent SACK option */
#define TOF_FASTOPEN 0x0100 /* TCP Fast Open (TFO) cookie */
#define TOF_MAXOPT 0x0200
u_int32_t to_tsval; /* new timestamp */
u_int32_t to_tsecr; /* reflected timestamp */
u_char *to_sacks; /* pointer to the first SACK blocks */
u_char *to_signature; /* pointer to the TCP-MD5 signature */
u_char *to_tfo_cookie; /* pointer to the TFO cookie */
u_int16_t to_mss; /* maximum segment size */
u_int8_t to_wscale; /* window scaling */
u_int8_t to_nsacks; /* number of SACK blocks */
u_int8_t to_tfo_len; /* TFO cookie length */
u_int32_t to_spare; /* UTO */
};
/*
* Flags for tcp_dooptions.
*/
#define TO_SYN 0x01 /* parse SYN-only options */
struct hc_metrics_lite { /* must stay in sync with hc_metrics */
u_long rmx_mtu; /* MTU for this path */
u_long rmx_ssthresh; /* outbound gateway buffer limit */
u_long rmx_rtt; /* estimated round trip time */
u_long rmx_rttvar; /* estimated rtt variance */
u_long rmx_cwnd; /* congestion window */
u_long rmx_sendpipe; /* outbound delay-bandwidth product */
u_long rmx_recvpipe; /* inbound delay-bandwidth product */
};
/*
* Used by tcp_maxmtu() to communicate interface specific features
* and limits at the time of connection setup.
*/
struct tcp_ifcap {
int ifcap;
u_int tsomax;
u_int tsomaxsegcount;
u_int tsomaxsegsize;
};
#ifndef _NETINET_IN_PCB_H_
struct in_conninfo;
#endif /* _NETINET_IN_PCB_H_ */
struct tcptw {
struct inpcb *tw_inpcb; /* XXX back pointer to internet pcb */
tcp_seq snd_nxt;
tcp_seq rcv_nxt;
tcp_seq iss;
tcp_seq irs;
u_short last_win; /* cached window value */
u_short tw_so_options; /* copy of so_options */
struct ucred *tw_cred; /* user credentials */
u_int32_t t_recent;
u_int32_t ts_offset; /* our timestamp offset */
u_int t_starttime;
int tw_time;
TAILQ_ENTRY(tcptw) tw_2msl;
void *tw_pspare; /* TCP_SIGNATURE */
u_int *tw_spare; /* TCP_SIGNATURE */
};
#define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb)
#define intotw(ip) ((struct tcptw *)(ip)->inp_ppcb)
#define sototcpcb(so) (intotcpcb(sotoinpcb(so)))
/*
* The smoothed round-trip time and estimated variance
* are stored as fixed point numbers scaled by the values below.
* For convenience, these scales are also used in smoothing the average
* (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed).
* With these scales, srtt has 3 bits to the right of the binary point,
* and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the
* binary point, and is smoothed with an ALPHA of 0.75.
*/
#define TCP_RTT_SCALE 32 /* multiplier for srtt; 3 bits frac. */
#define TCP_RTT_SHIFT 5 /* shift for srtt; 3 bits frac. */
#define TCP_RTTVAR_SCALE 16 /* multiplier for rttvar; 2 bits */
#define TCP_RTTVAR_SHIFT 4 /* shift for rttvar; 2 bits */
#define TCP_DELTA_SHIFT 2 /* see tcp_input.c */
/*
* The initial retransmission should happen at rtt + 4 * rttvar.
* Because of the way we do the smoothing, srtt and rttvar
* will each average +1/2 tick of bias. When we compute
* the retransmit timer, we want 1/2 tick of rounding and
* 1 extra tick because of +-1/2 tick uncertainty in the
* firing of the timer. The bias will give us exactly the
* 1.5 tick we need. But, because the bias is
* statistical, we have to test that we don't drop below
* the minimum feasible timer (which is 2 ticks).
* This version of the macro adapted from a paper by Lawrence
* Brakmo and Larry Peterson which outlines a problem caused
* by insufficient precision in the original implementation,
* which results in inappropriately large RTO values for very
* fast networks.
*/
#define TCP_REXMTVAL(tp) \
max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)) \
+ (tp)->t_rttvar) >> TCP_DELTA_SHIFT)
/*
* TCP statistics.
* Many of these should be kept per connection,
* but that's inconvenient at the moment.
*/
struct tcpstat {
uint64_t tcps_connattempt; /* connections initiated */
uint64_t tcps_accepts; /* connections accepted */
uint64_t tcps_connects; /* connections established */
uint64_t tcps_drops; /* connections dropped */
uint64_t tcps_conndrops; /* embryonic connections dropped */
uint64_t tcps_minmssdrops; /* average minmss too low drops */
uint64_t tcps_closed; /* conn. closed (includes drops) */
uint64_t tcps_segstimed; /* segs where we tried to get rtt */
uint64_t tcps_rttupdated; /* times we succeeded */
uint64_t tcps_delack; /* delayed acks sent */
uint64_t tcps_timeoutdrop; /* conn. dropped in rxmt timeout */
uint64_t tcps_rexmttimeo; /* retransmit timeouts */
uint64_t tcps_persisttimeo; /* persist timeouts */
uint64_t tcps_keeptimeo; /* keepalive timeouts */
uint64_t tcps_keepprobe; /* keepalive probes sent */
uint64_t tcps_keepdrops; /* connections dropped in keepalive */
uint64_t tcps_sndtotal; /* total packets sent */
uint64_t tcps_sndpack; /* data packets sent */
uint64_t tcps_sndbyte; /* data bytes sent */
uint64_t tcps_sndrexmitpack; /* data packets retransmitted */
uint64_t tcps_sndrexmitbyte; /* data bytes retransmitted */
uint64_t tcps_sndrexmitbad; /* unnecessary packet retransmissions */
uint64_t tcps_sndacks; /* ack-only packets sent */
uint64_t tcps_sndprobe; /* window probes sent */
uint64_t tcps_sndurg; /* packets sent with URG only */
uint64_t tcps_sndwinup; /* window update-only packets sent */
uint64_t tcps_sndctrl; /* control (SYN|FIN|RST) packets sent */
uint64_t tcps_rcvtotal; /* total packets received */
uint64_t tcps_rcvpack; /* packets received in sequence */
uint64_t tcps_rcvbyte; /* bytes received in sequence */
uint64_t tcps_rcvbadsum; /* packets received with ccksum errs */
uint64_t tcps_rcvbadoff; /* packets received with bad offset */
uint64_t tcps_rcvreassfull; /* packets dropped for no reass space */
uint64_t tcps_rcvshort; /* packets received too short */
uint64_t tcps_rcvduppack; /* duplicate-only packets received */
uint64_t tcps_rcvdupbyte; /* duplicate-only bytes received */
uint64_t tcps_rcvpartduppack; /* packets with some duplicate data */
uint64_t tcps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */
uint64_t tcps_rcvoopack; /* out-of-order packets received */
uint64_t tcps_rcvoobyte; /* out-of-order bytes received */
uint64_t tcps_rcvpackafterwin; /* packets with data after window */
uint64_t tcps_rcvbyteafterwin; /* bytes rcvd after window */
uint64_t tcps_rcvafterclose; /* packets rcvd after "close" */
uint64_t tcps_rcvwinprobe; /* rcvd window probe packets */
uint64_t tcps_rcvdupack; /* rcvd duplicate acks */
uint64_t tcps_rcvacktoomuch; /* rcvd acks for unsent data */
uint64_t tcps_rcvackpack; /* rcvd ack packets */
uint64_t tcps_rcvackbyte; /* bytes acked by rcvd acks */
uint64_t tcps_rcvwinupd; /* rcvd window update packets */
uint64_t tcps_pawsdrop; /* segments dropped due to PAWS */
uint64_t tcps_predack; /* times hdr predict ok for acks */
uint64_t tcps_preddat; /* times hdr predict ok for data pkts */
uint64_t tcps_pcbcachemiss;
uint64_t tcps_cachedrtt; /* times cached RTT in route updated */
uint64_t tcps_cachedrttvar; /* times cached rttvar updated */
uint64_t tcps_cachedssthresh; /* times cached ssthresh updated */
uint64_t tcps_usedrtt; /* times RTT initialized from route */
uint64_t tcps_usedrttvar; /* times RTTVAR initialized from rt */
uint64_t tcps_usedssthresh; /* times ssthresh initialized from rt*/
uint64_t tcps_persistdrop; /* timeout in persist state */
uint64_t tcps_badsyn; /* bogus SYN, e.g. premature ACK */
uint64_t tcps_mturesent; /* resends due to MTU discovery */
uint64_t tcps_listendrop; /* listen queue overflows */
uint64_t tcps_badrst; /* ignored RSTs in the window */
uint64_t tcps_sc_added; /* entry added to syncache */
uint64_t tcps_sc_retransmitted; /* syncache entry was retransmitted */
uint64_t tcps_sc_dupsyn; /* duplicate SYN packet */
uint64_t tcps_sc_dropped; /* could not reply to packet */
uint64_t tcps_sc_completed; /* successful extraction of entry */
uint64_t tcps_sc_bucketoverflow;/* syncache per-bucket limit hit */
uint64_t tcps_sc_cacheoverflow; /* syncache cache limit hit */
uint64_t tcps_sc_reset; /* RST removed entry from syncache */
uint64_t tcps_sc_stale; /* timed out or listen socket gone */
uint64_t tcps_sc_aborted; /* syncache entry aborted */
uint64_t tcps_sc_badack; /* removed due to bad ACK */
uint64_t tcps_sc_unreach; /* ICMP unreachable received */
uint64_t tcps_sc_zonefail; /* zalloc() failed */
uint64_t tcps_sc_sendcookie; /* SYN cookie sent */
uint64_t tcps_sc_recvcookie; /* SYN cookie received */
uint64_t tcps_hc_added; /* entry added to hostcache */
uint64_t tcps_hc_bucketoverflow;/* hostcache per bucket limit hit */
uint64_t tcps_finwait2_drops; /* Drop FIN_WAIT_2 connection after time limit */
/* SACK related stats */
uint64_t tcps_sack_recovery_episode; /* SACK recovery episodes */
uint64_t tcps_sack_rexmits; /* SACK rexmit segments */
uint64_t tcps_sack_rexmit_bytes; /* SACK rexmit bytes */
uint64_t tcps_sack_rcv_blocks; /* SACK blocks (options) received */
uint64_t tcps_sack_send_blocks; /* SACK blocks (options) sent */
uint64_t tcps_sack_sboverflow; /* times scoreboard overflowed */
/* ECN related stats */
uint64_t tcps_ecn_ce; /* ECN Congestion Experienced */
uint64_t tcps_ecn_ect0; /* ECN Capable Transport */
uint64_t tcps_ecn_ect1; /* ECN Capable Transport */
uint64_t tcps_ecn_shs; /* ECN successful handshakes */
uint64_t tcps_ecn_rcwnd; /* # times ECN reduced the cwnd */
/* TCP_SIGNATURE related stats */
uint64_t tcps_sig_rcvgoodsig; /* Total matching signature received */
uint64_t tcps_sig_rcvbadsig; /* Total bad signature received */
uint64_t tcps_sig_err_buildsig; /* Mismatching signature received */
uint64_t tcps_sig_err_sigopt; /* No signature expected by socket */
uint64_t tcps_sig_err_nosigopt; /* No signature provided by segment */
uint64_t _pad[12]; /* 6 UTO, 6 TBD */
};
#define tcps_rcvmemdrop tcps_rcvreassfull /* compat */
#ifdef _KERNEL
#define TI_UNLOCKED 1
#define TI_RLOCKED 2
#include <sys/counter.h>
VNET_PCPUSTAT_DECLARE(struct tcpstat, tcpstat); /* tcp statistics */
/*
* In-kernel consumers can use these accessor macros directly to update
* stats.
*/
#define TCPSTAT_ADD(name, val) \
VNET_PCPUSTAT_ADD(struct tcpstat, tcpstat, name, (val))
#define TCPSTAT_INC(name) TCPSTAT_ADD(name, 1)
/*
* Kernel module consumers must use this accessor macro.
*/
void kmod_tcpstat_inc(int statnum);
#define KMOD_TCPSTAT_INC(name) \
kmod_tcpstat_inc(offsetof(struct tcpstat, name) / sizeof(uint64_t))
/*
* Running TCP connection count by state.
*/
VNET_DECLARE(counter_u64_t, tcps_states[TCP_NSTATES]);
#define V_tcps_states VNET(tcps_states)
#define TCPSTATES_INC(state) counter_u64_add(V_tcps_states[state], 1)
#define TCPSTATES_DEC(state) counter_u64_add(V_tcps_states[state], -1)
/*
* TCP specific helper hook point identifiers.
*/
#define HHOOK_TCP_EST_IN 0
#define HHOOK_TCP_EST_OUT 1
#define HHOOK_TCP_LAST HHOOK_TCP_EST_OUT
struct tcp_hhook_data {
struct tcpcb *tp;
struct tcphdr *th;
struct tcpopt *to;
long len;
int tso;
tcp_seq curack;
};
#endif
/*
* TCB structure exported to user-land via sysctl(3).
* Evil hack: declare only if in_pcb.h and sys/socketvar.h have been
* included. Not all of our clients do.
*/
#if defined(_NETINET_IN_PCB_H_) && defined(_SYS_SOCKETVAR_H_)
struct xtcp_timer {
int tt_rexmt; /* retransmit timer */
int tt_persist; /* retransmit persistence */
int tt_keep; /* keepalive */
int tt_2msl; /* 2*msl TIME_WAIT timer */
int tt_delack; /* delayed ACK timer */
int t_rcvtime; /* Time since last packet received */
};
struct xtcpcb {
size_t xt_len;
struct inpcb xt_inp;
struct tcpcb xt_tp;
struct xsocket xt_socket;
struct xtcp_timer xt_timer;
u_quad_t xt_alignment_hack;
};
#endif
/*
* Identifiers for TCP sysctl nodes
*/
#define TCPCTL_DO_RFC1323 1 /* use RFC-1323 extensions */
#define TCPCTL_MSSDFLT 3 /* MSS default */
#define TCPCTL_STATS 4 /* statistics */
#define TCPCTL_RTTDFLT 5 /* default RTT estimate */
#define TCPCTL_KEEPIDLE 6 /* keepalive idle timer */
#define TCPCTL_KEEPINTVL 7 /* interval to send keepalives */
#define TCPCTL_SENDSPACE 8 /* send buffer space */
#define TCPCTL_RECVSPACE 9 /* receive buffer space */
#define TCPCTL_KEEPINIT 10 /* timeout for establishing syn */
#define TCPCTL_PCBLIST 11 /* list of all outstanding PCBs */
#define TCPCTL_DELACKTIME 12 /* time before sending delayed ACK */
#define TCPCTL_V6MSSDFLT 13 /* MSS default for IPv6 */
#define TCPCTL_SACK 14 /* Selective Acknowledgement,rfc 2018 */
#define TCPCTL_DROP 15 /* drop tcp connection */
#define TCPCTL_STATES 16 /* connection counts by TCP state */
#ifdef _KERNEL
#ifdef SYSCTL_DECL
SYSCTL_DECL(_net_inet_tcp);
SYSCTL_DECL(_net_inet_tcp_sack);
MALLOC_DECLARE(M_TCPLOG);
#endif
VNET_DECLARE(struct inpcbhead, tcb); /* queue of active tcpcb's */
VNET_DECLARE(struct inpcbinfo, tcbinfo);
extern int tcp_log_in_vain;
VNET_DECLARE(int, tcp_mssdflt); /* XXX */
VNET_DECLARE(int, tcp_minmss);
VNET_DECLARE(int, tcp_delack_enabled);
VNET_DECLARE(int, tcp_do_rfc3390);
VNET_DECLARE(int, tcp_initcwnd_segments);
VNET_DECLARE(int, tcp_sendspace);
VNET_DECLARE(int, tcp_recvspace);
VNET_DECLARE(int, path_mtu_discovery);
VNET_DECLARE(int, tcp_do_rfc3465);
VNET_DECLARE(int, tcp_abc_l_var);
#define V_tcb VNET(tcb)
#define V_tcbinfo VNET(tcbinfo)
#define V_tcp_mssdflt VNET(tcp_mssdflt)
#define V_tcp_minmss VNET(tcp_minmss)
#define V_tcp_delack_enabled VNET(tcp_delack_enabled)
#define V_tcp_do_rfc3390 VNET(tcp_do_rfc3390)
#define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments)
#define V_tcp_sendspace VNET(tcp_sendspace)
#define V_tcp_recvspace VNET(tcp_recvspace)
#define V_path_mtu_discovery VNET(path_mtu_discovery)
#define V_tcp_do_rfc3465 VNET(tcp_do_rfc3465)
#define V_tcp_abc_l_var VNET(tcp_abc_l_var)
VNET_DECLARE(int, tcp_do_sack); /* SACK enabled/disabled */
VNET_DECLARE(int, tcp_sc_rst_sock_fail); /* RST on sock alloc failure */
#define V_tcp_do_sack VNET(tcp_do_sack)
#define V_tcp_sc_rst_sock_fail VNET(tcp_sc_rst_sock_fail)
VNET_DECLARE(int, tcp_do_ecn); /* TCP ECN enabled/disabled */
VNET_DECLARE(int, tcp_ecn_maxretries);
#define V_tcp_do_ecn VNET(tcp_do_ecn)
#define V_tcp_ecn_maxretries VNET(tcp_ecn_maxretries)
VNET_DECLARE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST + 1]);
#define V_tcp_hhh VNET(tcp_hhh)
VNET_DECLARE(int, tcp_do_rfc6675_pipe);
#define V_tcp_do_rfc6675_pipe VNET(tcp_do_rfc6675_pipe)
int tcp_addoptions(struct tcpopt *, u_char *);
int tcp_ccalgounload(struct cc_algo *unload_algo);
struct tcpcb *
tcp_close(struct tcpcb *);
void tcp_discardcb(struct tcpcb *);
void tcp_twstart(struct tcpcb *);
void tcp_twclose(struct tcptw *, int);
void tcp_ctlinput(int, struct sockaddr *, void *);
int tcp_ctloutput(struct socket *, struct sockopt *);
struct tcpcb *
tcp_drop(struct tcpcb *, int);
void tcp_drain(void);
void tcp_init(void);
void tcp_fini(void *);
char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *,
const void *);
char *tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *,
const void *);
int tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *);
void tcp_reass_global_init(void);
void tcp_reass_flush(struct tcpcb *);
void tcp_dooptions(struct tcpopt *, u_char *, int, int);
void tcp_dropwithreset(struct mbuf *, struct tcphdr *,
struct tcpcb *, int, int);
void tcp_pulloutofband(struct socket *,
struct tcphdr *, struct mbuf *, int);
void tcp_xmit_timer(struct tcpcb *, int);
void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
void cc_ack_received(struct tcpcb *tp, struct tcphdr *th,
uint16_t type);
void cc_conn_init(struct tcpcb *tp);
void cc_post_recovery(struct tcpcb *tp, struct tcphdr *th);
void cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type);
void hhook_run_tcp_est_in(struct tcpcb *tp,
struct tcphdr *th, struct tcpopt *to);
int tcp_input(struct mbuf **, int *, int);
void tcp_do_segment(struct mbuf *, struct tcphdr *,
struct socket *, struct tcpcb *, int, int, uint8_t,
int);
int register_tcp_functions(struct tcp_function_block *blk, int wait);
int deregister_tcp_functions(struct tcp_function_block *blk);
struct tcp_function_block *find_and_ref_tcp_functions(struct tcp_function_set *fs);
struct tcp_function_block *find_and_ref_tcp_fb(struct tcp_function_block *blk);
int tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp);
u_long tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
u_long tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
u_int tcp_maxseg(const struct tcpcb *);
void tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *,
struct tcp_ifcap *);
void tcp_mss(struct tcpcb *, int);
int tcp_mssopt(struct in_conninfo *);
struct inpcb *
tcp_drop_syn_sent(struct inpcb *, int);
struct tcpcb *
tcp_newtcpcb(struct inpcb *);
int tcp_output(struct tcpcb *);
void tcp_state_change(struct tcpcb *, int);
void tcp_respond(struct tcpcb *, void *,
struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int);
void tcp_tw_init(void);
#ifdef VIMAGE
void tcp_tw_destroy(void);
#endif
void tcp_tw_zone_change(void);
int tcp_twcheck(struct inpcb *, struct tcpopt *, struct tcphdr *,
struct mbuf *, int);
void tcp_setpersist(struct tcpcb *);
#ifdef TCP_SIGNATURE
struct secasvar;
struct secasvar *tcp_get_sav(struct mbuf *, u_int);
int tcp_signature_do_compute(struct mbuf *, int, int, u_char *,
struct secasvar *);
int tcp_signature_compute(struct mbuf *, int, int, int, u_char *, u_int);
int tcp_signature_verify(struct mbuf *, int, int, int, struct tcpopt *,
struct tcphdr *, u_int);
int tcp_signature_check(struct mbuf *m, int off0, int tlen, int optlen,
struct tcpopt *to, struct tcphdr *th, u_int tcpbflag);
#endif
void tcp_slowtimo(void);
struct tcptemp *
tcpip_maketemplate(struct inpcb *);
void tcpip_fillheaders(struct inpcb *, void *, void *);
void tcp_timer_activate(struct tcpcb *, uint32_t, u_int);
int tcp_timer_active(struct tcpcb *, uint32_t);
void tcp_timer_stop(struct tcpcb *, uint32_t);
void tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int);
/*
* All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo)
*/
void tcp_hc_init(void);
#ifdef VIMAGE
void tcp_hc_destroy(void);
#endif
void tcp_hc_get(struct in_conninfo *, struct hc_metrics_lite *);
u_long tcp_hc_getmtu(struct in_conninfo *);
void tcp_hc_updatemtu(struct in_conninfo *, u_long);
void tcp_hc_update(struct in_conninfo *, struct hc_metrics_lite *);
extern struct pr_usrreqs tcp_usrreqs;
tcp_seq tcp_new_isn(struct tcpcb *);
int tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
void tcp_clean_sackreport(struct tcpcb *tp);
void tcp_sack_adjust(struct tcpcb *tp);
struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
void tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
void tcp_free_sackholes(struct tcpcb *tp);
int tcp_newreno(struct tcpcb *, struct tcphdr *);
u_long tcp_seq_subtract(u_long, u_long );
int tcp_compute_pipe(struct tcpcb *);
static inline void
tcp_fields_to_host(struct tcphdr *th)
{
th->th_seq = ntohl(th->th_seq);
th->th_ack = ntohl(th->th_ack);
th->th_win = ntohs(th->th_win);
th->th_urp = ntohs(th->th_urp);
}
#ifdef TCP_SIGNATURE
static inline void
tcp_fields_to_net(struct tcphdr *th)
{
th->th_seq = htonl(th->th_seq);
th->th_ack = htonl(th->th_ack);
th->th_win = htons(th->th_win);
th->th_urp = htons(th->th_urp);
}
#endif
#endif /* _KERNEL */
#endif /* _NETINET_TCP_VAR_H_ */

View File

@ -0,0 +1,59 @@
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)tcpip.h 8.1 (Berkeley) 6/10/93
* $FreeBSD$
*/
#ifndef _NETINET_TCPIP_H_
#define _NETINET_TCPIP_H_
/*
* Tcp+ip header, after ip options removed.
*/
struct tcpiphdr {
struct ipovly ti_i; /* overlaid ip structure */
struct tcphdr ti_t; /* tcp header */
};
#define ti_x1 ti_i.ih_x1
#define ti_pr ti_i.ih_pr
#define ti_len ti_i.ih_len
#define ti_src ti_i.ih_src
#define ti_dst ti_i.ih_dst
#define ti_sport ti_t.th_sport
#define ti_dport ti_t.th_dport
#define ti_seq ti_t.th_seq
#define ti_ack ti_t.th_ack
#define ti_x2 ti_t.th_x2
#define ti_off ti_t.th_off
#define ti_flags ti_t.th_flags
#define ti_win ti_t.th_win
#define ti_sum ti_t.th_sum
#define ti_urp ti_t.th_urp
#endif

View File

@ -0,0 +1,69 @@
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)udp.h 8.1 (Berkeley) 6/10/93
* $FreeBSD$
*/
#ifndef _NETINET_UDP_H_
#define _NETINET_UDP_H_
/*
* UDP protocol header.
* Per RFC 768, September, 1981.
*/
struct udphdr {
u_short uh_sport; /* source port */
u_short uh_dport; /* destination port */
u_short uh_ulen; /* udp length */
u_short uh_sum; /* udp checksum */
};
/*
* User-settable options (used with setsockopt).
*/
#define UDP_ENCAP 1
/* Start of reserved space for third-party user-settable options. */
#define UDP_VENDOR SO_VENDOR
/*
* UDP Encapsulation of IPsec Packets options.
*/
/* Encapsulation types. */
#define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */
#define UDP_ENCAP_ESPINUDP 2 /* draft-ietf-ipsec-udp-encaps-02+ */
/* Default ESP in UDP encapsulation port. */
#define UDP_ENCAP_ESPINUDP_PORT 500
/* Maximum UDP fragment size for ESP over UDP. */
#define UDP_ENCAP_ESPINUDP_MAXFRAGLEN 552
#endif

View File

@ -0,0 +1,184 @@
/*-
* Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)udp_var.h 8.1 (Berkeley) 6/10/93
* $FreeBSD$
*/
#ifndef _NETINET_UDP_VAR_H_
#define _NETINET_UDP_VAR_H_
/*
* UDP kernel structures and variables.
*/
struct udpiphdr {
struct ipovly ui_i; /* overlaid ip structure */
struct udphdr ui_u; /* udp header */
};
#define ui_x1 ui_i.ih_x1
#define ui_v ui_i.ih_x1[0]
#define ui_pr ui_i.ih_pr
#define ui_len ui_i.ih_len
#define ui_src ui_i.ih_src
#define ui_dst ui_i.ih_dst
#define ui_sport ui_u.uh_sport
#define ui_dport ui_u.uh_dport
#define ui_ulen ui_u.uh_ulen
#define ui_sum ui_u.uh_sum
struct inpcb;
struct mbuf;
typedef void(*udp_tun_func_t)(struct mbuf *, int, struct inpcb *,
const struct sockaddr *, void *);
typedef void(*udp_tun_icmp_t)(int, struct sockaddr *, void *, void *);
/*
* UDP control block; one per udp.
*/
struct udpcb {
udp_tun_func_t u_tun_func; /* UDP kernel tunneling callback. */
udp_tun_icmp_t u_icmp_func; /* UDP kernel tunneling icmp callback */
u_int u_flags; /* Generic UDP flags. */
uint16_t u_rxcslen; /* Coverage for incoming datagrams. */
uint16_t u_txcslen; /* Coverage for outgoing datagrams. */
void *u_tun_ctx; /* Tunneling callback context. */
};
#define intoudpcb(ip) ((struct udpcb *)(ip)->inp_ppcb)
#define sotoudpcb(so) (intoudpcb(sotoinpcb(so)))
/* IPsec: ESP in UDP tunneling: */
#define UF_ESPINUDP_NON_IKE 0x00000001 /* w/ non-IKE marker .. */
/* .. per draft-ietf-ipsec-nat-t-ike-0[01],
* and draft-ietf-ipsec-udp-encaps-(00/)01.txt */
#define UF_ESPINUDP 0x00000002 /* w/ non-ESP marker. */
struct udpstat {
/* input statistics: */
uint64_t udps_ipackets; /* total input packets */
uint64_t udps_hdrops; /* packet shorter than header */
uint64_t udps_badsum; /* checksum error */
uint64_t udps_nosum; /* no checksum */
uint64_t udps_badlen; /* data length larger than packet */
uint64_t udps_noport; /* no socket on port */
uint64_t udps_noportbcast; /* of above, arrived as broadcast */
uint64_t udps_fullsock; /* not delivered, input socket full */
uint64_t udpps_pcbcachemiss; /* input packets missing pcb cache */
uint64_t udpps_pcbhashmiss; /* input packets not for hashed pcb */
/* output statistics: */
uint64_t udps_opackets; /* total output packets */
uint64_t udps_fastout; /* output packets on fast path */
/* of no socket on port, arrived as multicast */
uint64_t udps_noportmcast;
uint64_t udps_filtermcast; /* blocked by multicast filter */
};
#ifdef _KERNEL
#include <sys/counter.h>
VNET_PCPUSTAT_DECLARE(struct udpstat, udpstat);
/*
* In-kernel consumers can use these accessor macros directly to update
* stats.
*/
#define UDPSTAT_ADD(name, val) \
VNET_PCPUSTAT_ADD(struct udpstat, udpstat, name, (val))
#define UDPSTAT_INC(name) UDPSTAT_ADD(name, 1)
/*
* Kernel module consumers must use this accessor macro.
*/
void kmod_udpstat_inc(int statnum);
#define KMOD_UDPSTAT_INC(name) \
kmod_udpstat_inc(offsetof(struct udpstat, name) / sizeof(uint64_t))
#endif
/*
* Identifiers for UDP sysctl nodes.
*/
#define UDPCTL_CHECKSUM 1 /* checksum UDP packets */
#define UDPCTL_STATS 2 /* statistics (read-only) */
#define UDPCTL_MAXDGRAM 3 /* max datagram size */
#define UDPCTL_RECVSPACE 4 /* default receive buffer space */
#define UDPCTL_PCBLIST 5 /* list of PCBs for UDP sockets */
#ifdef _KERNEL
#include <netinet/in_pcb.h>
SYSCTL_DECL(_net_inet_udp);
extern struct pr_usrreqs udp_usrreqs;
VNET_DECLARE(struct inpcbhead, udb);
VNET_DECLARE(struct inpcbinfo, udbinfo);
VNET_DECLARE(struct inpcbhead, ulitecb);
VNET_DECLARE(struct inpcbinfo, ulitecbinfo);
#define V_udb VNET(udb)
#define V_udbinfo VNET(udbinfo)
#define V_ulitecb VNET(ulitecb)
#define V_ulitecbinfo VNET(ulitecbinfo)
extern u_long udp_sendspace;
extern u_long udp_recvspace;
VNET_DECLARE(int, udp_cksum);
VNET_DECLARE(int, udp_blackhole);
#define V_udp_cksum VNET(udp_cksum)
#define V_udp_blackhole VNET(udp_blackhole)
extern int udp_log_in_vain;
static __inline struct inpcbinfo *
udp_get_inpcbinfo(int protocol)
{
return (protocol == IPPROTO_UDP) ? &V_udbinfo : &V_ulitecbinfo;
}
static __inline struct inpcbhead *
udp_get_pcblist(int protocol)
{
return (protocol == IPPROTO_UDP) ? &V_udb : &V_ulitecb;
}
int udp_newudpcb(struct inpcb *);
void udp_discardcb(struct udpcb *);
void udp_ctlinput(int, struct sockaddr *, void *);
void udplite_ctlinput(int, struct sockaddr *, void *);
int udp_ctloutput(struct socket *, struct sockopt *);
void udp_init(void);
void udplite_init(void);
int udp_input(struct mbuf **, int *, int);
void udplite_input(struct mbuf *, int);
struct inpcb *udp_notify(struct inpcb *inp, int errno);
int udp_shutdown(struct socket *so);
int udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f,
udp_tun_icmp_t i, void *ctx);
#endif /* _KERNEL */
#endif /* _NETINET_UDP_VAR_H_ */

View File

@ -0,0 +1,50 @@
/*-
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
* to the University of California by American Telephone and Telegraph
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)nlist.h 8.2 (Berkeley) 1/21/94
*
* $FreeBSD$
*/
#ifndef _NLIST_H_
#define _NLIST_H_
#include <sys/nlist_aout.h>
#include <sys/cdefs.h>
__BEGIN_DECLS
int nlist(const char *, struct nlist *);
__END_DECLS
#endif /* !_NLIST_H_ */

View File

@ -0,0 +1,58 @@
/*-
* Copyright (c) 2008, Jeffrey Roberson <jeff@freebsd.org>
* All rights reserved.
*
* Copyright (c) 2008 Nokia Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS__BITSET_H_
#define _SYS__BITSET_H_
/*
* Macros addressing word and bit within it, tuned to make compiler
* optimize cases when SETSIZE fits into single machine word.
*/
#define _BITSET_BITS (sizeof(long) * 8)
#define __howmany(x, y) (((x) + ((y) - 1)) / (y))
#define __bitset_words(_s) (__howmany(_s, _BITSET_BITS))
#define BITSET_DEFINE(t, _s) \
struct t { \
long __bits[__bitset_words((_s))]; \
}
/*
* Helper to declare a bitset without it's size being a constant.
*
* Sadly we cannot declare a bitset struct with '__bits[]', because it's
* the only member of the struct and the compiler complains.
*/
#define BITSET_DEFINE_VAR(t) BITSET_DEFINE(t, 1)
#endif /* !_SYS__BITSET_H_ */

View File

@ -0,0 +1,65 @@
/*-
* Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
* to the University of California by American Telephone and Telegraph
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)callout.h 8.2 (Berkeley) 1/21/94
* $FreeBSD$
*/
#ifndef _SYS__CALLOUT_H
#define _SYS__CALLOUT_H
#include <sys/queue.h>
struct lock_object;
LIST_HEAD(callout_list, callout);
SLIST_HEAD(callout_slist, callout);
TAILQ_HEAD(callout_tailq, callout);
struct callout {
union {
LIST_ENTRY(callout) le;
SLIST_ENTRY(callout) sle;
TAILQ_ENTRY(callout) tqe;
} c_links;
sbintime_t c_time; /* ticks to the event */
sbintime_t c_precision; /* delta allowed wrt opt */
void *c_arg; /* function argument */
void (*c_func)(void *); /* function to call */
struct lock_object *c_lock; /* lock to handle */
short c_flags; /* User State */
short c_iflags; /* Internal State */
volatile int c_cpu; /* CPU we're scheduled on */
};
#endif

View File

@ -0,0 +1,51 @@
/*-
* Copyright (c) 2008, Jeffrey Roberson <jeff@freebsd.org>
* All rights reserved.
*
* Copyright (c) 2008 Nokia Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS__CPUSET_H_
#define _SYS__CPUSET_H_
#include <sys/_types.h>
#include <sys/_bitset.h>
#ifdef _KERNEL
#define CPU_SETSIZE MAXCPU
#endif
#define CPU_MAXSIZE 256
#ifndef CPU_SETSIZE
#define CPU_SETSIZE CPU_MAXSIZE
#endif
BITSET_DEFINE(_cpuset, CPU_SETSIZE);
typedef struct _cpuset cpuset_t;
#endif /* !_SYS__CPUSET_H_ */

View File

@ -0,0 +1,41 @@
/*-
* Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Berkeley Software Design Inc's name may not be used to endorse or
* promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS__LOCK_H_
#define _SYS__LOCK_H_
struct lock_object {
const char *lo_name; /* Individual lock name. */
u_int lo_flags;
u_int lo_data; /* General class specific data. */
struct witness *lo_witness; /* Data for witness. */
};
#endif /* !_SYS__LOCK_H_ */

View File

@ -0,0 +1,50 @@
/*
* Copyright (c) 2010 Kip Macy All rights reserved.
* Copyright (c) 2013 Patrick Kelsey. All rights reserved.
* Copyright (C) 2017 THL A29 Limited, a Tencent company.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef _SYS__MUTEX_H_
#define _SYS__MUTEX_H_
/*
* Sleep/spin mutex
*/
struct mtx {
struct lock_object lock_object;
void* mtx_lock;
};
/*
* Members of struct mtx_padalign must mirror members of struct mtx.
* mtx_padalign mutexes can use the mtx(9) API transparently without
* modification.
*/
struct mtx_padalign {
struct lock_object lock_object;
void* mtx_lock;
} __attribute__((__aligned__(64)));
#endif

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2010 Kip Macy All rights reserved.
* Copyright (c) 2013 Patrick Kelsey. All rights reserved.
* Copyright (C) 2017 THL A29 Limited, a Tencent company.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef _SYS__RWLOCK_H_
#define _SYS__RWLOCK_H_
/*
* Reader/writer lock.
*/
struct rwlock {
struct lock_object lock_object;
void* rw_lock;
};
struct rwlock_padalign {
struct lock_object lock_object;
void* rw_lock;
} __attribute__((__aligned__(64)));
#endif

View File

@ -0,0 +1,42 @@
/*-
* Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice(s), this list of conditions and the following disclaimer as
* the first lines of this file unmodified other than the possible
* addition of one or more copyright notices.
* 2. Redistributions in binary form must reproduce the above copyright
* notice(s), this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS__SX_H_
#define _SYS__SX_H_
/*
* Shared/exclusive lock main structure definition.
*/
struct sx {
struct lock_object lock_object;
void* sx_lock;
};
#endif /* !_SYS__SX_H_ */

View File

@ -0,0 +1,73 @@
/*-
* Copyright (c) 2000 Doug Rabson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS__TASK_H_
#define _SYS__TASK_H_
#include <sys/queue.h>
/*
* Each task includes a function which is called from
* taskqueue_run(). The first argument is taken from the 'ta_context'
* field of struct task and the second argument is a count of how many
* times the task was enqueued before the call to taskqueue_run().
*
* List of locks
* (c) const after init
* (q) taskqueue lock
*/
typedef void task_fn_t(void *context, int pending);
typedef void gtask_fn_t(void *context);
struct task {
STAILQ_ENTRY(task) ta_link; /* (q) link for queue */
uint16_t ta_pending; /* (q) count times queued */
u_short ta_priority; /* (c) Priority */
task_fn_t *ta_func; /* (c) task handler */
void *ta_context; /* (c) argument for handler */
};
struct gtask {
STAILQ_ENTRY(gtask) ta_link; /* (q) link for queue */
uint16_t ta_flags; /* (q) state flags */
u_short ta_priority; /* (c) Priority */
gtask_fn_t *ta_func; /* (c) task handler */
void *ta_context; /* (c) argument for handler */
};
struct grouptask {
struct gtask gt_task;
void *gt_taskqueue;
LIST_ENTRY(grouptask) gt_list;
void *gt_uniq;
char *gt_name;
int16_t gt_irq;
int16_t gt_cpu;
};
#endif /* !_SYS__TASK_H_ */

View File

@ -80,7 +80,41 @@ typedef int __ct_rune_t; /* arg type for ctype funcs */
typedef __ct_rune_t __rune_t; /* rune_t (see above) */
typedef __ct_rune_t __wint_t; /* wint_t (see above) */
typedef __uint32_t __fixpt_t; /* fixed point number */
typedef __int64_t sbintime_t;
typedef __uint64_t __vm_offset_t;
typedef __int64_t __vm_ooffset_t;
typedef __uint64_t __vm_paddr_t;
typedef __uint64_t __vm_pindex_t;
typedef __uint64_t __vm_size_t;
typedef __vm_offset_t vm_offset_t;
typedef __vm_ooffset_t vm_ooffset_t;
typedef __vm_paddr_t vm_paddr_t;
typedef __vm_pindex_t vm_pindex_t;
typedef __vm_size_t vm_size_t;
typedef __cpuwhich_t cpuwhich_t;
typedef __cpulevel_t cpulevel_t;
typedef __cpusetid_t cpusetid_t;
#ifndef _UID_T_DECLARED
typedef __uid_t uid_t; /* user id */
#define _UID_T_DECLARED
#endif
typedef __int64_t __segsz_t; /* segment size (in pages) */
typedef __segsz_t segsz_t; /* segment size (in pages) */
typedef __uint32_t __fixpt_t; /* fixed point number */
typedef __fixpt_t fixpt_t; /* fixed point number */
typedef __int32_t __lwpid_t; /* Thread ID (a.k.a. LWP) */
#ifndef _LWPID_T_DECLARED
typedef __lwpid_t lwpid_t; /* Thread ID (a.k.a. LWP) */
#define _LWPID_T_DECLARED
#endif
#endif /* !_COMPAT_SYS__TYPES_H_ */

View File

@ -0,0 +1,208 @@
/*-
* Copyright (c) 2008, Jeffrey Roberson <jeff@freebsd.org>
* All rights reserved.
*
* Copyright (c) 2008 Nokia Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS_BITSET_H_
#define _SYS_BITSET_H_
#define __bitset_mask(_s, n) \
(1L << ((__bitset_words((_s)) == 1) ? \
(__size_t)(n) : ((n) % _BITSET_BITS)))
#define __bitset_word(_s, n) \
((__bitset_words((_s)) == 1) ? 0 : ((n) / _BITSET_BITS))
#define BIT_CLR(_s, n, p) \
((p)->__bits[__bitset_word(_s, n)] &= ~__bitset_mask((_s), (n)))
#define BIT_COPY(_s, f, t) (void)(*(t) = *(f))
#define BIT_ISSET(_s, n, p) \
((((p)->__bits[__bitset_word(_s, n)] & __bitset_mask((_s), (n))) != 0))
#define BIT_SET(_s, n, p) \
((p)->__bits[__bitset_word(_s, n)] |= __bitset_mask((_s), (n)))
#define BIT_ZERO(_s, p) do { \
__size_t __i; \
for (__i = 0; __i < __bitset_words((_s)); __i++) \
(p)->__bits[__i] = 0L; \
} while (0)
#define BIT_FILL(_s, p) do { \
__size_t __i; \
for (__i = 0; __i < __bitset_words((_s)); __i++) \
(p)->__bits[__i] = -1L; \
} while (0)
#define BIT_SETOF(_s, n, p) do { \
BIT_ZERO(_s, p); \
(p)->__bits[__bitset_word(_s, n)] = __bitset_mask((_s), (n)); \
} while (0)
/* Is p empty. */
#define BIT_EMPTY(_s, p) __extension__ ({ \
__size_t __i; \
for (__i = 0; __i < __bitset_words((_s)); __i++) \
if ((p)->__bits[__i]) \
break; \
__i == __bitset_words((_s)); \
})
/* Is p full set. */
#define BIT_ISFULLSET(_s, p) __extension__ ({ \
__size_t __i; \
for (__i = 0; __i < __bitset_words((_s)); __i++) \
if ((p)->__bits[__i] != (long)-1) \
break; \
__i == __bitset_words((_s)); \
})
/* Is c a subset of p. */
#define BIT_SUBSET(_s, p, c) __extension__ ({ \
__size_t __i; \
for (__i = 0; __i < __bitset_words((_s)); __i++) \
if (((c)->__bits[__i] & \
(p)->__bits[__i]) != \
(c)->__bits[__i]) \
break; \
__i == __bitset_words((_s)); \
})
/* Are there any common bits between b & c? */
#define BIT_OVERLAP(_s, p, c) __extension__ ({ \
__size_t __i; \
for (__i = 0; __i < __bitset_words((_s)); __i++) \
if (((c)->__bits[__i] & \
(p)->__bits[__i]) != 0) \
break; \
__i != __bitset_words((_s)); \
})
/* Compare two sets, returns 0 if equal 1 otherwise. */
#define BIT_CMP(_s, p, c) __extension__ ({ \
__size_t __i; \
for (__i = 0; __i < __bitset_words((_s)); __i++) \
if (((c)->__bits[__i] != \
(p)->__bits[__i])) \
break; \
__i != __bitset_words((_s)); \
})
#define BIT_OR(_s, d, s) do { \
__size_t __i; \
for (__i = 0; __i < __bitset_words((_s)); __i++) \
(d)->__bits[__i] |= (s)->__bits[__i]; \
} while (0)
#define BIT_AND(_s, d, s) do { \
__size_t __i; \
for (__i = 0; __i < __bitset_words((_s)); __i++) \
(d)->__bits[__i] &= (s)->__bits[__i]; \
} while (0)
#define BIT_NAND(_s, d, s) do { \
__size_t __i; \
for (__i = 0; __i < __bitset_words((_s)); __i++) \
(d)->__bits[__i] &= ~(s)->__bits[__i]; \
} while (0)
#define BIT_CLR_ATOMIC(_s, n, p) \
atomic_clear_long(&(p)->__bits[__bitset_word(_s, n)], \
__bitset_mask((_s), n))
#define BIT_SET_ATOMIC(_s, n, p) \
atomic_set_long(&(p)->__bits[__bitset_word(_s, n)], \
__bitset_mask((_s), n))
#define BIT_SET_ATOMIC_ACQ(_s, n, p) \
atomic_set_acq_long(&(p)->__bits[__bitset_word(_s, n)], \
__bitset_mask((_s), n))
/* Convenience functions catering special cases. */
#define BIT_AND_ATOMIC(_s, d, s) do { \
__size_t __i; \
for (__i = 0; __i < __bitset_words((_s)); __i++) \
atomic_clear_long(&(d)->__bits[__i], \
~(s)->__bits[__i]); \
} while (0)
#define BIT_OR_ATOMIC(_s, d, s) do { \
__size_t __i; \
for (__i = 0; __i < __bitset_words((_s)); __i++) \
atomic_set_long(&(d)->__bits[__i], \
(s)->__bits[__i]); \
} while (0)
#define BIT_COPY_STORE_REL(_s, f, t) do { \
__size_t __i; \
for (__i = 0; __i < __bitset_words((_s)); __i++) \
atomic_store_rel_long(&(t)->__bits[__i], \
(f)->__bits[__i]); \
} while (0)
#define BIT_FFS(_s, p) __extension__ ({ \
__size_t __i; \
int __bit; \
\
__bit = 0; \
for (__i = 0; __i < __bitset_words((_s)); __i++) { \
if ((p)->__bits[__i] != 0) { \
__bit = ffsl((p)->__bits[__i]); \
__bit += __i * _BITSET_BITS; \
break; \
} \
} \
__bit; \
})
#define BIT_COUNT(_s, p) __extension__ ({ \
__size_t __i; \
int __count; \
\
__count = 0; \
for (__i = 0; __i < __bitset_words((_s)); __i++) \
__count += __bitcountl((p)->__bits[__i]); \
__count; \
})
#define BITSET_T_INITIALIZER(x) \
{ .__bits = { x } }
#define BITSET_FSET(n) \
[ 0 ... ((n) - 1) ] = (-1L)
/*
* Dynamically allocate a bitset.
*/
#define BITSET_ALLOC(_s, mt, mf) \
malloc(__bitset_words(_s) * sizeof(long), mt, (mf))
#endif /* !_SYS_BITSET_H_ */

View File

@ -0,0 +1,134 @@
/*-
* Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
* to the University of California by American Telephone and Telegraph
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)callout.h 8.2 (Berkeley) 1/21/94
* $FreeBSD$
*/
#ifndef _SYS_CALLOUT_H_
#define _SYS_CALLOUT_H_
#include <sys/_callout.h>
#define CALLOUT_LOCAL_ALLOC 0x0001 /* was allocated from callfree */
#define CALLOUT_ACTIVE 0x0002 /* callout is currently active */
#define CALLOUT_PENDING 0x0004 /* callout is waiting for timeout */
#define CALLOUT_MPSAFE 0x0008 /* deprecated */
#define CALLOUT_RETURNUNLOCKED 0x0010 /* handler returns with mtx unlocked */
#define CALLOUT_SHAREDLOCK 0x0020 /* callout lock held in shared mode */
#define CALLOUT_DFRMIGRATION 0x0040 /* callout in deferred migration mode */
#define CALLOUT_PROCESSED 0x0080 /* callout in wheel or processing list? */
#define CALLOUT_DIRECT 0x0100 /* allow exec from hw int context */
#define C_DIRECT_EXEC 0x0001 /* direct execution of callout */
#define C_PRELBITS 7
#define C_PRELRANGE ((1 << C_PRELBITS) - 1)
#define C_PREL(x) (((x) + 1) << 1)
#define C_PRELGET(x) (int)((((x) >> 1) & C_PRELRANGE) - 1)
#define C_HARDCLOCK 0x0100 /* align to hardclock() calls */
#define C_ABSOLUTE 0x0200 /* event time is absolute. */
struct callout_handle {
struct callout *callout;
};
/* Flags for callout_stop_safe() */
#define CS_DRAIN 0x0001 /* callout_drain(), wait allowed */
#define CS_EXECUTING 0x0002 /* Positive return value indicates that
the callout was executing */
#ifdef _KERNEL
/*
* Note the flags field is actually *two* fields. The c_flags
* field is the one that caller operations that may, or may not have
* a lock touches i.e. callout_deactivate(). The other, the c_iflags,
* is the internal flags that *must* be kept correct on which the
* callout system depend on e.g. callout_pending().
* The c_iflag is used internally by the callout system to determine which
* list the callout is on and track internal state. Callers *should not*
* use the c_flags field directly but should use the macros provided.
*
* The c_iflags field holds internal flags that are protected by internal
* locks of the callout subsystem. The c_flags field holds external flags.
* The caller must hold its own lock while manipulating or reading external
* flags via callout_active(), callout_deactivate(), callout_reset*(), or
* callout_stop() to avoid races.
*/
#define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE)
#define callout_deactivate(c) ((c)->c_flags &= ~CALLOUT_ACTIVE)
#define callout_drain(c) _callout_stop_safe(c, CS_DRAIN, NULL)
void callout_init(struct callout *, int);
void _callout_init_lock(struct callout *, struct lock_object *, int);
#define callout_init_mtx(c, mtx, flags) \
_callout_init_lock((c), ((mtx) != NULL) ? &(mtx)->lock_object : \
NULL, (flags))
#define callout_init_rm(c, rm, flags) \
_callout_init_lock((c), ((rm) != NULL) ? &(rm)->lock_object : \
NULL, (flags))
#define callout_init_rw(c, rw, flags) \
_callout_init_lock((c), ((rw) != NULL) ? &(rw)->lock_object : \
NULL, (flags))
#define callout_pending(c) ((c)->c_iflags & CALLOUT_PENDING)
int callout_reset_sbt_on(struct callout *, sbintime_t, sbintime_t,
void (*)(void *), void *, int, int);
#define callout_reset_sbt(c, sbt, pr, fn, arg, flags) \
callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), -1, (flags))
#define callout_reset_sbt_curcpu(c, sbt, pr, fn, arg, flags) \
callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), PCPU_GET(cpuid),\
(flags))
#define callout_reset_on(c, to_ticks, fn, arg, cpu) \
callout_reset_sbt_on((c), tick_sbt * (to_ticks), 0, (fn), (arg), \
(cpu), C_HARDCLOCK)
#define callout_reset(c, on_tick, fn, arg) \
callout_reset_on((c), (on_tick), (fn), (arg), -1)
#define callout_reset_curcpu(c, on_tick, fn, arg) \
callout_reset_on((c), (on_tick), (fn), (arg), PCPU_GET(cpuid))
#define callout_schedule_sbt_on(c, sbt, pr, cpu, flags) \
callout_reset_sbt_on((c), (sbt), (pr), (c)->c_func, (c)->c_arg, \
(cpu), (flags))
#define callout_schedule_sbt(c, sbt, pr, flags) \
callout_schedule_sbt_on((c), (sbt), (pr), -1, (flags))
#define callout_schedule_sbt_curcpu(c, sbt, pr, flags) \
callout_schedule_sbt_on((c), (sbt), (pr), PCPU_GET(cpuid), (flags))
int callout_schedule(struct callout *, int);
int callout_schedule_on(struct callout *, int, int);
#define callout_schedule_curcpu(c, on_tick) \
callout_schedule_on((c), (on_tick), PCPU_GET(cpuid))
#define callout_stop(c) _callout_stop_safe(c, 0, NULL)
int _callout_stop_safe(struct callout *, int, void (*)(void *));
void callout_process(sbintime_t now);
#define callout_async_drain(c, d) \
_callout_stop_safe(c, 0, d)
#endif
#endif /* _SYS_CALLOUT_H_ */

View File

@ -0,0 +1,61 @@
/*-
* Copyright (c) 2013 FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS_CAPRIGHTS_H_
#define _SYS_CAPRIGHTS_H_
/*
* The top two bits in the first element of the cr_rights[] array contain
* total number of elements in the array - 2. This means if those two bits are
* equal to 0, we have 2 array elements.
* The top two bits in all remaining array elements should be 0.
* The next five bits contain array index. Only one bit is used and bit position
* in this five-bits range defines array index. This means there can be at most
* five array elements.
*/
#define CAP_RIGHTS_VERSION_00 0
/*
#define CAP_RIGHTS_VERSION_01 1
#define CAP_RIGHTS_VERSION_02 2
#define CAP_RIGHTS_VERSION_03 3
*/
#define CAP_RIGHTS_VERSION CAP_RIGHTS_VERSION_00
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
#ifndef _CAP_RIGHTS_T_DECLARED
#define _CAP_RIGHTS_T_DECLARED
typedef struct cap_rights cap_rights_t;
#endif
#endif /* !_SYS_CAPRIGHTS_H_ */

View File

@ -0,0 +1,153 @@
/*-
* Copyright (c) 2008, Jeffrey Roberson <jeff@freebsd.org>
* All rights reserved.
*
* Copyright (c) 2008 Nokia Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS_CPUSET_H_
#define _SYS_CPUSET_H_
#include <sys/_cpuset.h>
#include <sys/bitset.h>
#define _NCPUBITS _BITSET_BITS
#define _NCPUWORDS __bitset_words(CPU_SETSIZE)
#define CPUSETBUFSIZ ((2 + sizeof(long) * 2) * _NCPUWORDS)
#define CPU_CLR(n, p) BIT_CLR(CPU_SETSIZE, n, p)
#define CPU_COPY(f, t) BIT_COPY(CPU_SETSIZE, f, t)
#define CPU_ISSET(n, p) BIT_ISSET(CPU_SETSIZE, n, p)
#define CPU_SET(n, p) BIT_SET(CPU_SETSIZE, n, p)
#define CPU_ZERO(p) BIT_ZERO(CPU_SETSIZE, p)
#define CPU_FILL(p) BIT_FILL(CPU_SETSIZE, p)
#define CPU_SETOF(n, p) BIT_SETOF(CPU_SETSIZE, n, p)
#define CPU_EMPTY(p) BIT_EMPTY(CPU_SETSIZE, p)
#define CPU_ISFULLSET(p) BIT_ISFULLSET(CPU_SETSIZE, p)
#define CPU_SUBSET(p, c) BIT_SUBSET(CPU_SETSIZE, p, c)
#define CPU_OVERLAP(p, c) BIT_OVERLAP(CPU_SETSIZE, p, c)
#define CPU_CMP(p, c) BIT_CMP(CPU_SETSIZE, p, c)
#define CPU_OR(d, s) BIT_OR(CPU_SETSIZE, d, s)
#define CPU_AND(d, s) BIT_AND(CPU_SETSIZE, d, s)
#define CPU_NAND(d, s) BIT_NAND(CPU_SETSIZE, d, s)
#define CPU_CLR_ATOMIC(n, p) BIT_CLR_ATOMIC(CPU_SETSIZE, n, p)
#define CPU_SET_ATOMIC(n, p) BIT_SET_ATOMIC(CPU_SETSIZE, n, p)
#define CPU_SET_ATOMIC_ACQ(n, p) BIT_SET_ATOMIC_ACQ(CPU_SETSIZE, n, p)
#define CPU_AND_ATOMIC(n, p) BIT_AND_ATOMIC(CPU_SETSIZE, n, p)
#define CPU_OR_ATOMIC(d, s) BIT_OR_ATOMIC(CPU_SETSIZE, d, s)
#define CPU_COPY_STORE_REL(f, t) BIT_COPY_STORE_REL(CPU_SETSIZE, f, t)
#define CPU_FFS(p) BIT_FFS(CPU_SETSIZE, p)
#define CPU_COUNT(p) BIT_COUNT(CPU_SETSIZE, p)
#define CPUSET_FSET BITSET_FSET(_NCPUWORDS)
#define CPUSET_T_INITIALIZER BITSET_T_INITIALIZER
/*
* Valid cpulevel_t values.
*/
#define CPU_LEVEL_ROOT 1 /* All system cpus. */
#define CPU_LEVEL_CPUSET 2 /* Available cpus for which. */
#define CPU_LEVEL_WHICH 3 /* Actual mask/id for which. */
/*
* Valid cpuwhich_t values.
*/
#define CPU_WHICH_TID 1 /* Specifies a thread id. */
#define CPU_WHICH_PID 2 /* Specifies a process id. */
#define CPU_WHICH_CPUSET 3 /* Specifies a set id. */
#define CPU_WHICH_IRQ 4 /* Specifies an irq #. */
#define CPU_WHICH_JAIL 5 /* Specifies a jail id. */
#define CPU_WHICH_DOMAIN 6 /* Specifies a NUMA domain id. */
/*
* Reserved cpuset identifiers.
*/
#define CPUSET_INVALID -1
#define CPUSET_DEFAULT 0
#ifdef _KERNEL
#include <sys/queue.h>
LIST_HEAD(setlist, cpuset);
/*
* cpusets encapsulate cpu binding information for one or more threads.
*
* a - Accessed with atomics.
* s - Set at creation, never modified. Only a ref required to read.
* c - Locked internally by a cpuset lock.
*
* The bitmask is only modified while holding the cpuset lock. It may be
* read while only a reference is held but the consumer must be prepared
* to deal with inconsistent results.
*/
struct cpuset {
cpuset_t cs_mask; /* bitmask of valid cpus. */
volatile u_int cs_ref; /* (a) Reference count. */
int cs_flags; /* (s) Flags from below. */
cpusetid_t cs_id; /* (s) Id or INVALID. */
struct cpuset *cs_parent; /* (s) Pointer to our parent. */
LIST_ENTRY(cpuset) cs_link; /* (c) All identified sets. */
LIST_ENTRY(cpuset) cs_siblings; /* (c) Sibling set link. */
struct setlist cs_children; /* (c) List of children. */
};
#define CPU_SET_ROOT 0x0001 /* Set is a root set. */
#define CPU_SET_RDONLY 0x0002 /* No modification allowed. */
extern cpuset_t *cpuset_root;
struct prison;
struct proc;
struct thread;
struct cpuset *cpuset_thread0(void);
struct cpuset *cpuset_ref(struct cpuset *);
void cpuset_rel(struct cpuset *);
int cpuset_setthread(lwpid_t id, cpuset_t *);
int cpuset_setithread(lwpid_t id, int cpu);
int cpuset_create_root(struct prison *, struct cpuset **);
int cpuset_setproc_update_set(struct proc *, struct cpuset *);
int cpuset_which(cpuwhich_t, id_t, struct proc **,
struct thread **, struct cpuset **);
char *cpusetobj_strprint(char *, const cpuset_t *);
int cpusetobj_strscan(cpuset_t *, const char *);
#ifdef DDB
void ddb_display_cpuset(const cpuset_t *);
#endif
#else
__BEGIN_DECLS
int cpuset(cpusetid_t *);
int cpuset_setid(cpuwhich_t, id_t, cpusetid_t);
int cpuset_getid(cpulevel_t, cpuwhich_t, id_t, cpusetid_t *);
int cpuset_getaffinity(cpulevel_t, cpuwhich_t, id_t, size_t, cpuset_t *);
int cpuset_setaffinity(cpulevel_t, cpuwhich_t, id_t, size_t, const cpuset_t *);
__END_DECLS
#endif
#endif /* !_SYS_CPUSET_H_ */

View File

@ -0,0 +1,100 @@
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)domain.h 8.1 (Berkeley) 6/2/93
* $FreeBSD$
*/
#ifndef _SYS_DOMAIN_H_
#define _SYS_DOMAIN_H_
/*
* Structure per communications domain.
*/
/*
* Forward structure declarations for function prototypes [sic].
*/
struct mbuf;
struct ifnet;
struct socket;
struct domain {
int dom_family; /* AF_xxx */
char *dom_name;
void (*dom_init) /* initialize domain data structures */
(void);
void (*dom_destroy) /* cleanup structures / state */
(void);
int (*dom_externalize) /* externalize access rights */
(struct mbuf *, struct mbuf **, int);
void (*dom_dispose) /* dispose of internalized rights */
(struct socket *);
struct protosw *dom_protosw, *dom_protoswNPROTOSW;
struct domain *dom_next;
int (*dom_rtattach) /* initialize routing table */
(void **, int);
int (*dom_rtdetach) /* clean up routing table */
(void **, int);
void *(*dom_ifattach)(struct ifnet *);
void (*dom_ifdetach)(struct ifnet *, void *);
int (*dom_ifmtu)(struct ifnet *);
/* af-dependent data on ifnet */
};
#ifdef _KERNEL
extern int domain_init_status;
extern struct domain *domains;
void domain_add(void *);
void domain_init(void *);
#ifdef VIMAGE
void vnet_domain_init(void *);
void vnet_domain_uninit(void *);
#endif
#define DOMAIN_SET(name) \
SYSINIT(domain_add_ ## name, SI_SUB_PROTO_DOMAIN, \
SI_ORDER_FIRST, domain_add, & name ## domain); \
SYSINIT(domain_init_ ## name, SI_SUB_PROTO_DOMAIN, \
SI_ORDER_SECOND, domain_init, & name ## domain);
#ifdef VIMAGE
#define VNET_DOMAIN_SET(name) \
SYSINIT(domain_add_ ## name, SI_SUB_PROTO_DOMAIN, \
SI_ORDER_FIRST, domain_add, & name ## domain); \
VNET_SYSINIT(vnet_domain_init_ ## name, SI_SUB_PROTO_DOMAIN, \
SI_ORDER_SECOND, vnet_domain_init, & name ## domain); \
VNET_SYSUNINIT(vnet_domain_uninit_ ## name, \
SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND, vnet_domain_uninit, \
& name ## domain)
#else /* !VIMAGE */
#define VNET_DOMAIN_SET(name) DOMAIN_SET(name)
#endif /* VIMAGE */
#endif /* _KERNEL */
#endif /* !_SYS_DOMAIN_H_ */

View File

@ -0,0 +1,299 @@
/*-
* Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS_EVENT_H_
#define _SYS_EVENT_H_
#include <sys/queue.h>
#define EVFILT_READ (-1)
#define EVFILT_WRITE (-2)
#define EVFILT_AIO (-3) /* attached to aio requests */
#define EVFILT_VNODE (-4) /* attached to vnodes */
#define EVFILT_PROC (-5) /* attached to struct proc */
#define EVFILT_SIGNAL (-6) /* attached to struct proc */
#define EVFILT_TIMER (-7) /* timers */
#define EVFILT_PROCDESC (-8) /* attached to process descriptors */
#define EVFILT_FS (-9) /* filesystem events */
#define EVFILT_LIO (-10) /* attached to lio requests */
#define EVFILT_USER (-11) /* User events */
#define EVFILT_SENDFILE (-12) /* attached to sendfile requests */
#define EVFILT_SYSCOUNT 12
#define EV_SET(kevp_, a, b, c, d, e, f) do { \
struct kevent *kevp = (kevp_); \
(kevp)->ident = (a); \
(kevp)->filter = (b); \
(kevp)->flags = (c); \
(kevp)->fflags = (d); \
(kevp)->data = (e); \
(kevp)->udata = (f); \
} while(0)
struct kevent {
uintptr_t ident; /* identifier for this event */
short filter; /* filter for event */
u_short flags;
u_int fflags;
intptr_t data;
void *udata; /* opaque user data identifier */
};
/* actions */
#define EV_ADD 0x0001 /* add event to kq (implies enable) */
#define EV_DELETE 0x0002 /* delete event from kq */
#define EV_ENABLE 0x0004 /* enable event */
#define EV_DISABLE 0x0008 /* disable event (not reported) */
#define EV_FORCEONESHOT 0x0100 /* enable _ONESHOT and force trigger */
/* flags */
#define EV_ONESHOT 0x0010 /* only report one occurrence */
#define EV_CLEAR 0x0020 /* clear event state after reporting */
#define EV_RECEIPT 0x0040 /* force EV_ERROR on success, data=0 */
#define EV_DISPATCH 0x0080 /* disable event after reporting */
#define EV_SYSFLAGS 0xF000 /* reserved by system */
#define EV_DROP 0x1000 /* note should be dropped */
#define EV_FLAG1 0x2000 /* filter-specific flag */
#define EV_FLAG2 0x4000 /* filter-specific flag */
/* returned values */
#define EV_EOF 0x8000 /* EOF detected */
#define EV_ERROR 0x4000 /* error, data contains errno */
/*
* data/hint flags/masks for EVFILT_USER, shared with userspace
*
* On input, the top two bits of fflags specifies how the lower twenty four
* bits should be applied to the stored value of fflags.
*
* On output, the top two bits will always be set to NOTE_FFNOP and the
* remaining twenty four bits will contain the stored fflags value.
*/
#define NOTE_FFNOP 0x00000000 /* ignore input fflags */
#define NOTE_FFAND 0x40000000 /* AND fflags */
#define NOTE_FFOR 0x80000000 /* OR fflags */
#define NOTE_FFCOPY 0xc0000000 /* copy fflags */
#define NOTE_FFCTRLMASK 0xc0000000 /* masks for operations */
#define NOTE_FFLAGSMASK 0x00ffffff
#define NOTE_TRIGGER 0x01000000 /* Cause the event to be
triggered for output. */
/*
* data/hint flags for EVFILT_{READ|WRITE}, shared with userspace
*/
#define NOTE_LOWAT 0x0001 /* low water mark */
#define NOTE_FILE_POLL 0x0002 /* behave like poll() */
/*
* data/hint flags for EVFILT_VNODE, shared with userspace
*/
#define NOTE_DELETE 0x0001 /* vnode was removed */
#define NOTE_WRITE 0x0002 /* data contents changed */
#define NOTE_EXTEND 0x0004 /* size increased */
#define NOTE_ATTRIB 0x0008 /* attributes changed */
#define NOTE_LINK 0x0010 /* link count changed */
#define NOTE_RENAME 0x0020 /* vnode was renamed */
#define NOTE_REVOKE 0x0040 /* vnode access was revoked */
#define NOTE_OPEN 0x0080 /* vnode was opened */
#define NOTE_CLOSE 0x0100 /* file closed, fd did not
allowed write */
#define NOTE_CLOSE_WRITE 0x0200 /* file closed, fd did allowed
write */
#define NOTE_READ 0x0400 /* file was read */
/*
* data/hint flags for EVFILT_PROC and EVFILT_PROCDESC, shared with userspace
*/
#define NOTE_EXIT 0x80000000 /* process exited */
#define NOTE_FORK 0x40000000 /* process forked */
#define NOTE_EXEC 0x20000000 /* process exec'd */
#define NOTE_PCTRLMASK 0xf0000000 /* mask for hint bits */
#define NOTE_PDATAMASK 0x000fffff /* mask for pid */
/* additional flags for EVFILT_PROC */
#define NOTE_TRACK 0x00000001 /* follow across forks */
#define NOTE_TRACKERR 0x00000002 /* could not track child */
#define NOTE_CHILD 0x00000004 /* am a child process */
/* additional flags for EVFILT_TIMER */
#define NOTE_SECONDS 0x00000001 /* data is seconds */
#define NOTE_MSECONDS 0x00000002 /* data is milliseconds */
#define NOTE_USECONDS 0x00000004 /* data is microseconds */
#define NOTE_NSECONDS 0x00000008 /* data is nanoseconds */
struct knote;
SLIST_HEAD(klist, knote);
struct kqueue;
TAILQ_HEAD(kqlist, kqueue);
struct knlist {
struct klist kl_list;
void (*kl_lock)(void *); /* lock function */
void (*kl_unlock)(void *);
void (*kl_assert_locked)(void *);
void (*kl_assert_unlocked)(void *);
void *kl_lockarg; /* argument passed to lock functions */
int kl_autodestroy;
};
#ifdef _KERNEL
/*
* Flags for knote call
*/
#define KNF_LISTLOCKED 0x0001 /* knlist is locked */
#define KNF_NOKQLOCK 0x0002 /* do not keep KQ_LOCK */
#define KNOTE(list, hist, flags) knote(list, hist, flags)
#define KNOTE_LOCKED(list, hint) knote(list, hint, KNF_LISTLOCKED)
#define KNOTE_UNLOCKED(list, hint) knote(list, hint, 0)
#define KNLIST_EMPTY(list) SLIST_EMPTY(&(list)->kl_list)
/*
* Flag indicating hint is a signal. Used by EVFILT_SIGNAL, and also
* shared by EVFILT_PROC (all knotes attached to p->p_klist)
*/
#define NOTE_SIGNAL 0x08000000
/*
* Hint values for the optional f_touch event filter. If f_touch is not set
* to NULL and f_isfd is zero the f_touch filter will be called with the type
* argument set to EVENT_REGISTER during a kevent() system call. It is also
* called under the same conditions with the type argument set to EVENT_PROCESS
* when the event has been triggered.
*/
#define EVENT_REGISTER 1
#define EVENT_PROCESS 2
struct filterops {
int f_isfd; /* true if ident == filedescriptor */
int (*f_attach)(struct knote *kn);
void (*f_detach)(struct knote *kn);
int (*f_event)(struct knote *kn, long hint);
void (*f_touch)(struct knote *kn, struct kevent *kev, u_long type);
};
/*
* Setting the KN_INFLUX flag enables you to unlock the kq that this knote
* is on, and modify kn_status as if you had the KQ lock.
*
* kn_sfflags, kn_sdata, and kn_kevent are protected by the knlist lock.
*/
struct knote {
SLIST_ENTRY(knote) kn_link; /* for kq */
SLIST_ENTRY(knote) kn_selnext; /* for struct selinfo */
struct knlist *kn_knlist; /* f_attach populated */
TAILQ_ENTRY(knote) kn_tqe;
struct kqueue *kn_kq; /* which queue we are on */
struct kevent kn_kevent;
int kn_status; /* protected by kq lock */
#define KN_ACTIVE 0x01 /* event has been triggered */
#define KN_QUEUED 0x02 /* event is on queue */
#define KN_DISABLED 0x04 /* event is disabled */
#define KN_DETACHED 0x08 /* knote is detached */
#define KN_INFLUX 0x10 /* knote is in flux */
#define KN_MARKER 0x20 /* ignore this knote */
#define KN_KQUEUE 0x40 /* this knote belongs to a kq */
#define KN_HASKQLOCK 0x80 /* for _inevent */
#define KN_SCAN 0x100 /* flux set in kqueue_scan() */
int kn_sfflags; /* saved filter flags */
intptr_t kn_sdata; /* saved data field */
union {
struct file *p_fp; /* file data pointer */
struct proc *p_proc; /* proc pointer */
struct kaiocb *p_aio; /* AIO job pointer */
struct aioliojob *p_lio; /* LIO job pointer */
sbintime_t *p_nexttime; /* next timer event fires at */
void *p_v; /* generic other pointer */
} kn_ptr;
struct filterops *kn_fop;
void *kn_hook;
int kn_hookid;
#define kn_id kn_kevent.ident
#define kn_filter kn_kevent.filter
#define kn_flags kn_kevent.flags
#define kn_fflags kn_kevent.fflags
#define kn_data kn_kevent.data
#define kn_fp kn_ptr.p_fp
};
struct kevent_copyops {
void *arg;
int (*k_copyout)(void *arg, struct kevent *kevp, int count);
int (*k_copyin)(void *arg, struct kevent *kevp, int count);
};
struct thread;
struct proc;
struct knlist;
struct mtx;
struct rwlock;
extern void knote(struct knlist *list, long hint, int lockflags);
extern void knote_fork(struct knlist *list, int pid);
extern struct knlist *knlist_alloc(struct mtx *lock);
extern void knlist_detach(struct knlist *knl);
extern void knlist_add(struct knlist *knl, struct knote *kn, int islocked);
extern void knlist_remove(struct knlist *knl, struct knote *kn, int islocked);
extern int knlist_empty(struct knlist *knl);
extern void knlist_init(struct knlist *knl, void *lock,
void (*kl_lock)(void *), void (*kl_unlock)(void *),
void (*kl_assert_locked)(void *), void (*kl_assert_unlocked)(void *));
extern void knlist_init_mtx(struct knlist *knl, struct mtx *lock);
extern void knlist_init_rw_reader(struct knlist *knl, struct rwlock *lock);
extern void knlist_destroy(struct knlist *knl);
extern void knlist_cleardel(struct knlist *knl, struct thread *td,
int islocked, int killkn);
#define knlist_clear(knl, islocked) \
knlist_cleardel((knl), NULL, (islocked), 0)
#define knlist_delete(knl, td, islocked) \
knlist_cleardel((knl), (td), (islocked), 1)
extern void knote_fdclose(struct thread *p, int fd);
extern int kqfd_register(int fd, struct kevent *kev, struct thread *p,
int waitok);
extern int kqueue_add_filteropts(int filt, struct filterops *filtops);
extern int kqueue_del_filteropts(int filt);
#else /* !_KERNEL */
#include <sys/cdefs.h>
struct timespec;
__BEGIN_DECLS
int kqueue(void);
int kevent(int kq, const struct kevent *changelist, int nchanges,
struct kevent *eventlist, int nevents,
const struct timespec *timeout);
__END_DECLS
#endif /* !_KERNEL */
#endif /* !_SYS_EVENT_H_ */

View File

@ -0,0 +1,196 @@
/*-
* Copyright (c) 1987, 1993
* The Regents of the University of California.
* Copyright (c) 2005, 2009 Robert N. M. Watson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)malloc.h 8.5 (Berkeley) 5/3/95
* $FreeBSD$
*/
#ifndef _SYS_MALLOC_H_
#define _SYS_MALLOC_H_
#include <sys/param.h>
#include <sys/queue.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
#define MINALLOCSIZE UMA_SMALLEST_UNIT
/*
* flags to malloc.
*/
#define M_NOWAIT 0x0001 /* do not block */
#define M_WAITOK 0x0002 /* ok to block */
#define M_ZERO 0x0100 /* bzero the allocation */
#define M_NOVM 0x0200 /* don't ask VM for pages */
#define M_USE_RESERVE 0x0400 /* can alloc out of reserve memory */
#define M_NODUMP 0x0800 /* don't dump pages in this allocation */
#define M_FIRSTFIT 0x1000 /* Only for vmem, fast fit. */
#define M_BESTFIT 0x2000 /* Only for vmem, low fragmentation. */
#define M_MAGIC 877983977 /* time when first defined :-) */
/*
* Two malloc type structures are present: malloc_type, which is used by a
* type owner to declare the type, and malloc_type_internal, which holds
* malloc-owned statistics and other ABI-sensitive fields, such as the set of
* malloc statistics indexed by the compile-time MAXCPU constant.
* Applications should avoid introducing dependence on the allocator private
* data layout and size.
*
* The malloc_type ks_next field is protected by malloc_mtx. Other fields in
* malloc_type are static after initialization so unsynchronized.
*
* Statistics in malloc_type_stats are written only when holding a critical
* section and running on the CPU associated with the index into the stat
* array, but read lock-free resulting in possible (minor) races, which the
* monitoring app should take into account.
*/
struct malloc_type_stats {
uint64_t mts_memalloced; /* Bytes allocated on CPU. */
uint64_t mts_memfreed; /* Bytes freed on CPU. */
uint64_t mts_numallocs; /* Number of allocates on CPU. */
uint64_t mts_numfrees; /* number of frees on CPU. */
uint64_t mts_size; /* Bitmask of sizes allocated on CPU. */
uint64_t _mts_reserved1; /* Reserved field. */
uint64_t _mts_reserved2; /* Reserved field. */
uint64_t _mts_reserved3; /* Reserved field. */
};
/*
* Index definitions for the mti_probes[] array.
*/
#define DTMALLOC_PROBE_MALLOC 0
#define DTMALLOC_PROBE_FREE 1
#define DTMALLOC_PROBE_MAX 2
#ifndef MAXCPU
#define MAXCPU 1
#endif
struct malloc_type_internal {
uint32_t mti_probes[DTMALLOC_PROBE_MAX];
/* DTrace probe ID array. */
u_char mti_zone;
struct malloc_type_stats mti_stats[MAXCPU];
};
/*
* Public data structure describing a malloc type. Private data is hung off
* of ks_handle to avoid encoding internal malloc(9) data structures in
* modules, which will statically allocate struct malloc_type.
*/
struct malloc_type {
struct malloc_type *ks_next; /* Next in global chain. */
u_long ks_magic; /* Detect programmer error. */
const char *ks_shortdesc; /* Printable type name. */
void *ks_handle; /* Priv. data, was lo_class. */
};
/*
* Statistics structure headers for user space. The kern.malloc sysctl
* exposes a structure stream consisting of a stream header, then a series of
* malloc type headers and statistics structures (quantity maxcpus). For
* convenience, the kernel will provide the current value of maxcpus at the
* head of the stream.
*/
#define MALLOC_TYPE_STREAM_VERSION 0x00000001
struct malloc_type_stream_header {
uint32_t mtsh_version; /* Stream format version. */
uint32_t mtsh_maxcpus; /* Value of MAXCPU for stream. */
uint32_t mtsh_count; /* Number of records. */
uint32_t _mtsh_pad; /* Pad/reserved field. */
};
#define MALLOC_MAX_NAME 32
struct malloc_type_header {
char mth_name[MALLOC_MAX_NAME];
};
#ifdef _KERNEL
#define MALLOC_DEFINE(type, shortdesc, longdesc) \
struct malloc_type type[1] = { \
{ NULL, M_MAGIC, shortdesc, NULL } \
}; \
SYSINIT(type##_init, SI_SUB_KMEM, SI_ORDER_THIRD, malloc_init, \
type); \
SYSUNINIT(type##_uninit, SI_SUB_KMEM, SI_ORDER_ANY, \
malloc_uninit, type)
#define MALLOC_DECLARE(type) \
extern struct malloc_type type[1]
MALLOC_DECLARE(M_CACHE);
MALLOC_DECLARE(M_DEVBUF);
MALLOC_DECLARE(M_TEMP);
/*
* Deprecated macro versions of not-quite-malloc() and free().
*/
#define MALLOC(space, cast, size, type, flags) \
((space) = (cast)malloc((u_long)(size), (type), (flags)))
#define FREE(addr, type) free((addr), (type))
/*
* XXX this should be declared in <sys/uio.h>, but that tends to fail
* because <sys/uio.h> is included in a header before the source file
* has a chance to include <sys/malloc.h> to get MALLOC_DECLARE() defined.
*/
MALLOC_DECLARE(M_IOV);
extern struct mtx malloc_mtx;
/*
* Function type used when iterating over the list of malloc types.
*/
typedef void malloc_type_list_func_t(struct malloc_type *, void *);
void contigfree(void *addr, unsigned long size, struct malloc_type *type);
void *contigmalloc(unsigned long size, struct malloc_type *type, int flags,
vm_paddr_t low, vm_paddr_t high, unsigned long alignment,
vm_paddr_t boundary) __malloc_like __result_use_check
__alloc_size(1) __alloc_align(6);
void free(void *addr, struct malloc_type *type);
void *malloc(unsigned long size, struct malloc_type *type, int flags)
__malloc_like __result_use_check __alloc_size(1);
void malloc_init(void *);
int malloc_last_fail(void);
void malloc_type_allocated(struct malloc_type *type, unsigned long size);
void malloc_type_freed(struct malloc_type *type, unsigned long size);
void malloc_type_list(malloc_type_list_func_t *, void *);
void malloc_uninit(void *);
void *realloc(void *addr, unsigned long size, struct malloc_type *type,
int flags) __result_use_check __alloc_size(2);
void *reallocf(void *addr, unsigned long size, struct malloc_type *type,
int flags) __alloc_size(2);
struct malloc_type *malloc_desc2type(const char *desc);
#endif /* _KERNEL */
#endif /* !_SYS_MALLOC_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,111 @@
/*-
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
* to the University of California by American Telephone and Telegraph
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)nlist.h 8.2 (Berkeley) 1/21/94
*
* $FreeBSD$
*/
#ifndef _SYS_NLIST_AOUT_H_
#define _SYS_NLIST_AOUT_H_
/*
* Symbol table entries in a.out files.
*/
/*
* Layout of each symbol. The "#ifdef _AOUT_INCLUDE_" is so that
* programs including nlist.h can initialize nlist structures
* statically.
*/
struct nlist {
#ifdef _AOUT_INCLUDE_
union {
const char *n_name; /* symbol name (in memory) */
long n_strx; /* file string table offset (on disk) */
} n_un;
#else
const char *n_name; /* symbol name (in memory) */
#endif
unsigned char n_type; /* type defines */
char n_other; /* ".type" and binding information */
short n_desc; /* used by stab entries */
unsigned long n_value; /* address/value of the symbol */
};
#define n_hash n_desc /* used internally by ld(1); XXX */
/*
* Defines for n_type.
*/
#define N_UNDF 0x00 /* undefined */
#define N_ABS 0x02 /* absolute address */
#define N_TEXT 0x04 /* text segment */
#define N_DATA 0x06 /* data segment */
#define N_BSS 0x08 /* bss segment */
#define N_INDR 0x0a /* alias definition */
#define N_SIZE 0x0c /* pseudo type, defines a symbol's size */
#define N_COMM 0x12 /* common reference */
/* GNU extensions */
#define N_SETA 0x14 /* Absolute set element symbol */
#define N_SETT 0x16 /* Text set element symbol */
#define N_SETD 0x18 /* Data set element symbol */
#define N_SETB 0x1a /* Bss set element symbol */
#define N_SETV 0x1c /* Pointer to set vector in data area. */
/* end GNU extensions */
#define N_FN 0x1e /* file name (N_EXT on) */
#define N_WARN 0x1e /* warning message (N_EXT off) */
#define N_EXT 0x01 /* external (global) bit, OR'ed in */
#define N_TYPE 0x1e /* mask for all the type bits */
#define N_STAB 0xe0 /* mask for debugger symbols -- stab(5) */
/*
* Defines for n_other. It contains the ".type" (AUX) field in the least
* significant 4 bits, and the binding (for weak symbols) in the most
* significant 4 bits.
*/
#define N_AUX(p) ((p)->n_other & 0xf)
#define N_BIND(p) (((unsigned int)(p)->n_other >> 4) & 0xf)
#define N_OTHER(r, v) (((unsigned int)(r) << 4) | ((v) & 0xf))
#define AUX_OBJECT 1 /* data object */
#define AUX_FUNC 2 /* function */
/*#define BIND_LOCAL 0 not used */
/*#define BIND_GLOBAL 1 not used */
#define BIND_WEAK 2 /* weak binding */
#define N_FORMAT "%08x" /* namelist value format; XXX */
#endif /* !_SYS_NLIST_AOUT_H_ */

View File

@ -0,0 +1,110 @@
/*-
* Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS_OSD_H_
#define _SYS_OSD_H_
#include <sys/queue.h>
/*
* Lock key:
* (c) container lock (e.g. jail's pr_mtx) and/or osd_object_lock
* (l) osd_list_lock
*/
struct osd {
u_int osd_nslots; /* (c) */
void **osd_slots; /* (c) */
LIST_ENTRY(osd) osd_next; /* (l) */
};
#ifdef _KERNEL
#define OSD_THREAD 0
#define OSD_JAIL 1
#define OSD_KHELP 2
#define OSD_FIRST OSD_THREAD
#define OSD_LAST OSD_KHELP
typedef void (*osd_destructor_t)(void *value);
typedef int (*osd_method_t)(void *obj, void *data);
int osd_register(u_int type, osd_destructor_t destructor,
osd_method_t *methods);
void osd_deregister(u_int type, u_int slot);
int osd_set(u_int type, struct osd *osd, u_int slot, void *value);
void **osd_reserve(u_int slot);
int osd_set_reserved(u_int type, struct osd *osd, u_int slot, void **rsv,
void *value);
void osd_free_reserved(void **rsv);
void *osd_get(u_int type, struct osd *osd, u_int slot);
void osd_del(u_int type, struct osd *osd, u_int slot);
int osd_call(u_int type, u_int method, void *obj, void *data);
void osd_exit(u_int type, struct osd *osd);
#define osd_thread_register(destructor) \
osd_register(OSD_THREAD, (destructor), NULL)
#define osd_thread_deregister(slot) \
osd_deregister(OSD_THREAD, (slot))
#define osd_thread_set(td, slot, value) \
osd_set(OSD_THREAD, &(td)->td_osd, (slot), (value))
#define osd_thread_set_reserved(td, slot, rsv, value) \
osd_set_reserved(OSD_THREAD, &(td)->td_osd, (slot), (rsv), (value))
#define osd_thread_get(td, slot) \
osd_get(OSD_THREAD, &(td)->td_osd, (slot))
#define osd_thread_del(td, slot) do { \
KASSERT((td) == curthread, ("Not curthread.")); \
osd_del(OSD_THREAD, &(td)->td_osd, (slot)); \
} while (0)
#define osd_thread_call(td, method, data) \
osd_call(OSD_THREAD, (method), (td), (data))
#define osd_thread_exit(td) \
osd_exit(OSD_THREAD, &(td)->td_osd)
#define osd_jail_register(destructor, methods) \
osd_register(OSD_JAIL, (destructor), (methods))
#define osd_jail_deregister(slot) \
osd_deregister(OSD_JAIL, (slot))
#define osd_jail_set(pr, slot, value) \
osd_set(OSD_JAIL, &(pr)->pr_osd, (slot), (value))
#define osd_jail_set_reserved(pr, slot, rsv, value) \
osd_set_reserved(OSD_JAIL, &(pr)->pr_osd, (slot), (rsv), (value))
#define osd_jail_get(pr, slot) \
osd_get(OSD_JAIL, &(pr)->pr_osd, (slot))
#define osd_jail_del(pr, slot) \
osd_del(OSD_JAIL, &(pr)->pr_osd, (slot))
#define osd_jail_call(pr, method, data) \
osd_call(OSD_JAIL, (method), (pr), (data))
#define osd_jail_exit(pr) \
osd_exit(OSD_JAIL, &(pr)->pr_osd)
#endif /* _KERNEL */
#endif /* !_SYS_OSD_H_ */

View File

@ -0,0 +1,133 @@
/*-
* Copyright (c) 1994, Henrik Vestergaard Draboel
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Henrik Vestergaard Draboel.
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS_PRIORITY_H_
#define _SYS_PRIORITY_H_
/*
* Process priority specifications.
*/
/*
* Priority classes.
*/
#define PRI_ITHD 1 /* Interrupt thread. */
#define PRI_REALTIME 2 /* Real time process. */
#define PRI_TIMESHARE 3 /* Time sharing process. */
#define PRI_IDLE 4 /* Idle process. */
/*
* PRI_FIFO is POSIX.1B SCHED_FIFO.
*/
#define PRI_FIFO_BIT 8
#define PRI_FIFO (PRI_FIFO_BIT | PRI_REALTIME)
#define PRI_BASE(P) ((P) & ~PRI_FIFO_BIT)
#define PRI_IS_REALTIME(P) (PRI_BASE(P) == PRI_REALTIME)
#define PRI_NEED_RR(P) ((P) != PRI_FIFO)
/*
* Priorities. Note that with 64 run queues, differences less than 4 are
* insignificant.
*/
/*
* Priorities range from 0 to 255, but differences of less then 4 (RQ_PPQ)
* are insignificant. Ranges are as follows:
*
* Interrupt threads: 0 - 47
* Realtime user threads: 48 - 79
* Top half kernel threads: 80 - 119
* Time sharing user threads: 120 - 223
* Idle user threads: 224 - 255
*
* XXX If/When the specific interrupt thread and top half thread ranges
* disappear, a larger range can be used for user processes.
*/
#define PRI_MIN (0) /* Highest priority. */
#define PRI_MAX (255) /* Lowest priority. */
#define PRI_MIN_ITHD (PRI_MIN)
#define PRI_MAX_ITHD (PRI_MIN_REALTIME - 1)
#define PI_REALTIME (PRI_MIN_ITHD + 0)
#define PI_AV (PRI_MIN_ITHD + 4)
#define PI_NET (PRI_MIN_ITHD + 8)
#define PI_DISK (PRI_MIN_ITHD + 12)
#define PI_TTY (PRI_MIN_ITHD + 16)
#define PI_DULL (PRI_MIN_ITHD + 20)
#define PI_SOFT (PRI_MIN_ITHD + 24)
#define PI_SWI(x) (PI_SOFT + (x) * RQ_PPQ)
#define PRI_MIN_REALTIME (48)
#define PRI_MAX_REALTIME (PRI_MIN_KERN - 1)
#define PRI_MIN_KERN (80)
#define PRI_MAX_KERN (PRI_MIN_TIMESHARE - 1)
#define PSWP (PRI_MIN_KERN + 0)
#define PVM (PRI_MIN_KERN + 4)
#define PINOD (PRI_MIN_KERN + 8)
#define PRIBIO (PRI_MIN_KERN + 12)
#define PVFS (PRI_MIN_KERN + 16)
#define PZERO (PRI_MIN_KERN + 20)
#define PSOCK (PRI_MIN_KERN + 24)
#define PWAIT (PRI_MIN_KERN + 28)
#define PLOCK (PRI_MIN_KERN + 32)
#define PPAUSE (PRI_MIN_KERN + 36)
#define PRI_MIN_TIMESHARE (120)
#define PRI_MAX_TIMESHARE (PRI_MIN_IDLE - 1)
#define PUSER (PRI_MIN_TIMESHARE)
#define PRI_MIN_IDLE (224)
#define PRI_MAX_IDLE (PRI_MAX)
#ifdef _KERNEL
/* Other arguments for kern_yield(9). */
#define PRI_USER -2 /* Change to current user priority. */
#define PRI_UNCHANGED -1 /* Do not change priority. */
#endif
struct priority {
u_char pri_class; /* Scheduling class. */
u_char pri_level; /* Normal priority level. */
u_char pri_native; /* Priority before propagation. */
u_char pri_user; /* User priority based on p_cpu and p_nice. */
};
#endif /* !_SYS_PRIORITY_H_ */

View File

@ -0,0 +1,351 @@
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)protosw.h 8.1 (Berkeley) 6/2/93
* $FreeBSD$
*/
#ifndef _SYS_PROTOSW_H_
#define _SYS_PROTOSW_H_
/* Forward declare these structures referenced from prototypes below. */
struct kaiocb;
struct mbuf;
struct thread;
struct sockaddr;
struct socket;
struct sockopt;
/*#ifdef _KERNEL*/
/*
* Protocol switch table.
*
* Each protocol has a handle initializing one of these structures,
* which is used for protocol-protocol and system-protocol communication.
*
* A protocol is called through the pr_init entry before any other.
* Thereafter it is called every 200ms through the pr_fasttimo entry and
* every 500ms through the pr_slowtimo for timer based actions.
* The system will call the pr_drain entry if it is low on space and
* this should throw away any non-critical data.
*
* Protocols pass data between themselves as chains of mbufs using
* the pr_input and pr_output hooks. Pr_input passes data up (towards
* the users) and pr_output passes it down (towards the interfaces); control
* information passes up and down on pr_ctlinput and pr_ctloutput.
* The protocol is responsible for the space occupied by any the
* arguments to these entries and must dispose it.
*
* In retrospect, it would be a lot nicer to use an interface
* similar to the vnode VOP interface.
*/
/* USE THESE FOR YOUR PROTOTYPES ! */
typedef int pr_input_t (struct mbuf **, int*, int);
typedef int pr_output_t (struct mbuf *, struct socket *, ...);
typedef void pr_ctlinput_t (int, struct sockaddr *, void *);
typedef int pr_ctloutput_t (struct socket *, struct sockopt *);
typedef void pr_init_t (void);
typedef void pr_fasttimo_t (void);
typedef void pr_slowtimo_t (void);
typedef void pr_drain_t (void);
struct protosw {
short pr_type; /* socket type used for */
struct domain *pr_domain; /* domain protocol a member of */
short pr_protocol; /* protocol number */
short pr_flags; /* see below */
/* protocol-protocol hooks */
pr_input_t *pr_input; /* input to protocol (from below) */
pr_output_t *pr_output; /* output to protocol (from above) */
pr_ctlinput_t *pr_ctlinput; /* control input (from below) */
pr_ctloutput_t *pr_ctloutput; /* control output (from above) */
/* utility hooks */
pr_init_t *pr_init;
pr_fasttimo_t *pr_fasttimo; /* fast timeout (200ms) */
pr_slowtimo_t *pr_slowtimo; /* slow timeout (500ms) */
pr_drain_t *pr_drain; /* flush any excess space possible */
struct pr_usrreqs *pr_usrreqs; /* user-protocol hook */
};
/*#endif*/
#define PR_SLOWHZ 2 /* 2 slow timeouts per second */
#define PR_FASTHZ 5 /* 5 fast timeouts per second */
/*
* This number should be defined again within each protocol family to avoid
* confusion.
*/
#define PROTO_SPACER 32767 /* spacer for loadable protocols */
/*
* Values for pr_flags.
* PR_ADDR requires PR_ATOMIC;
* PR_ADDR and PR_CONNREQUIRED are mutually exclusive.
* PR_IMPLOPCL means that the protocol allows sendto without prior connect,
* and the protocol understands the MSG_EOF flag. The first property is
* is only relevant if PR_CONNREQUIRED is set (otherwise sendto is allowed
* anyhow).
*/
#define PR_ATOMIC 0x01 /* exchange atomic messages only */
#define PR_ADDR 0x02 /* addresses given with messages */
#define PR_CONNREQUIRED 0x04 /* connection required by protocol */
#define PR_WANTRCVD 0x08 /* want PRU_RCVD calls */
#define PR_RIGHTS 0x10 /* passes capabilities */
#define PR_IMPLOPCL 0x20 /* implied open/close */
#define PR_LASTHDR 0x40 /* enforce ipsec policy; last header */
/*
* In earlier BSD network stacks, a single pr_usrreq() function pointer was
* invoked with an operation number indicating what operation was desired.
* We now provide individual function pointers which protocols can implement,
* which offers a number of benefits (such as type checking for arguments).
* These older constants are still present in order to support TCP debugging.
*/
#define PRU_ATTACH 0 /* attach protocol to up */
#define PRU_DETACH 1 /* detach protocol from up */
#define PRU_BIND 2 /* bind socket to address */
#define PRU_LISTEN 3 /* listen for connection */
#define PRU_CONNECT 4 /* establish connection to peer */
#define PRU_ACCEPT 5 /* accept connection from peer */
#define PRU_DISCONNECT 6 /* disconnect from peer */
#define PRU_SHUTDOWN 7 /* won't send any more data */
#define PRU_RCVD 8 /* have taken data; more room now */
#define PRU_SEND 9 /* send this data */
#define PRU_ABORT 10 /* abort (fast DISCONNECT, DETATCH) */
#define PRU_CONTROL 11 /* control operations on protocol */
#define PRU_SENSE 12 /* return status into m */
#define PRU_RCVOOB 13 /* retrieve out of band data */
#define PRU_SENDOOB 14 /* send out of band data */
#define PRU_SOCKADDR 15 /* fetch socket's address */
#define PRU_PEERADDR 16 /* fetch peer's address */
#define PRU_CONNECT2 17 /* connect two sockets */
/* begin for protocols internal use */
#define PRU_FASTTIMO 18 /* 200ms timeout */
#define PRU_SLOWTIMO 19 /* 500ms timeout */
#define PRU_PROTORCV 20 /* receive from below */
#define PRU_PROTOSEND 21 /* send to below */
/* end for protocol's internal use */
#define PRU_SEND_EOF 22 /* send and close */
#define PRU_SOSETLABEL 23 /* MAC label change */
#define PRU_CLOSE 24 /* socket close */
#define PRU_FLUSH 25 /* flush the socket */
#define PRU_NREQ 25
#ifdef PRUREQUESTS
const char *prurequests[] = {
"ATTACH", "DETACH", "BIND", "LISTEN",
"CONNECT", "ACCEPT", "DISCONNECT", "SHUTDOWN",
"RCVD", "SEND", "ABORT", "CONTROL",
"SENSE", "RCVOOB", "SENDOOB", "SOCKADDR",
"PEERADDR", "CONNECT2", "FASTTIMO", "SLOWTIMO",
"PROTORCV", "PROTOSEND", "SEND_EOF", "SOSETLABEL",
"CLOSE", "FLUSH",
};
#endif
#ifdef _KERNEL /* users shouldn't see this decl */
struct ifnet;
struct stat;
struct ucred;
struct uio;
/*
* If the ordering here looks odd, that's because it's alphabetical. These
* should eventually be merged back into struct protosw.
*
* Some fields initialized to defaults if they are NULL.
* See uipc_domain.c:net_init_domain()
*/
struct pr_usrreqs {
void (*pru_abort)(struct socket *so);
int (*pru_accept)(struct socket *so, struct sockaddr **nam);
int (*pru_attach)(struct socket *so, int proto, struct thread *td);
int (*pru_bind)(struct socket *so, struct sockaddr *nam,
struct thread *td);
int (*pru_connect)(struct socket *so, struct sockaddr *nam,
struct thread *td);
int (*pru_connect2)(struct socket *so1, struct socket *so2);
int (*pru_control)(struct socket *so, u_long cmd, caddr_t data,
struct ifnet *ifp, struct thread *td);
void (*pru_detach)(struct socket *so);
int (*pru_disconnect)(struct socket *so);
int (*pru_listen)(struct socket *so, int backlog,
struct thread *td);
int (*pru_peeraddr)(struct socket *so, struct sockaddr **nam);
int (*pru_rcvd)(struct socket *so, int flags);
int (*pru_rcvoob)(struct socket *so, struct mbuf *m, int flags);
int (*pru_send)(struct socket *so, int flags, struct mbuf *m,
struct sockaddr *addr, struct mbuf *control,
struct thread *td);
#define PRUS_OOB 0x1
#define PRUS_EOF 0x2
#define PRUS_MORETOCOME 0x4
#define PRUS_NOTREADY 0x8
int (*pru_ready)(struct socket *so, struct mbuf *m, int count);
int (*pru_sense)(struct socket *so, struct stat *sb);
int (*pru_shutdown)(struct socket *so);
int (*pru_flush)(struct socket *so, int direction);
int (*pru_sockaddr)(struct socket *so, struct sockaddr **nam);
int (*pru_sosend)(struct socket *so, struct sockaddr *addr,
struct uio *uio, struct mbuf *top, struct mbuf *control,
int flags, struct thread *td);
int (*pru_soreceive)(struct socket *so, struct sockaddr **paddr,
struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
int *flagsp);
int (*pru_sopoll)(struct socket *so, int events,
struct ucred *cred, struct thread *td);
void (*pru_sosetlabel)(struct socket *so);
void (*pru_close)(struct socket *so);
int (*pru_bindat)(int fd, struct socket *so, struct sockaddr *nam,
struct thread *td);
int (*pru_connectat)(int fd, struct socket *so,
struct sockaddr *nam, struct thread *td);
int (*pru_aio_queue)(struct socket *so, struct kaiocb *job);
};
/*
* All nonvoid pru_*() functions below return EOPNOTSUPP.
*/
int pru_accept_notsupp(struct socket *so, struct sockaddr **nam);
int pru_aio_queue_notsupp(struct socket *so, struct kaiocb *job);
int pru_attach_notsupp(struct socket *so, int proto, struct thread *td);
int pru_bind_notsupp(struct socket *so, struct sockaddr *nam,
struct thread *td);
int pru_bindat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
struct thread *td);
int pru_connect_notsupp(struct socket *so, struct sockaddr *nam,
struct thread *td);
int pru_connectat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
struct thread *td);
int pru_connect2_notsupp(struct socket *so1, struct socket *so2);
int pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
struct ifnet *ifp, struct thread *td);
int pru_disconnect_notsupp(struct socket *so);
int pru_listen_notsupp(struct socket *so, int backlog, struct thread *td);
int pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam);
int pru_rcvd_notsupp(struct socket *so, int flags);
int pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags);
int pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
struct sockaddr *addr, struct mbuf *control, struct thread *td);
int pru_ready_notsupp(struct socket *so, struct mbuf *m, int count);
int pru_sense_null(struct socket *so, struct stat *sb);
int pru_shutdown_notsupp(struct socket *so);
int pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam);
int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr,
struct uio *uio, struct mbuf *top, struct mbuf *control, int flags,
struct thread *td);
int pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr,
struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
int *flagsp);
int pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred,
struct thread *td);
#endif /* _KERNEL */
/*
* The arguments to the ctlinput routine are
* (*protosw[].pr_ctlinput)(cmd, sa, arg);
* where cmd is one of the commands below, sa is a pointer to a sockaddr,
* and arg is a `void *' argument used within a protocol family.
*/
#define PRC_IFDOWN 0 /* interface transition */
#define PRC_ROUTEDEAD 1 /* select new route if possible ??? */
#define PRC_IFUP 2 /* interface has come back up */
/* was PRC_QUENCH2 3 DEC congestion bit says slow down */
/* was PRC_QUENCH 4 Deprecated by RFC 6633 */
#define PRC_MSGSIZE 5 /* message size forced drop */
#define PRC_HOSTDEAD 6 /* host appears to be down */
#define PRC_HOSTUNREACH 7 /* deprecated (use PRC_UNREACH_HOST) */
#define PRC_UNREACH_NET 8 /* no route to network */
#define PRC_UNREACH_HOST 9 /* no route to host */
#define PRC_UNREACH_PROTOCOL 10 /* dst says bad protocol */
#define PRC_UNREACH_PORT 11 /* bad port # */
/* was PRC_UNREACH_NEEDFRAG 12 (use PRC_MSGSIZE) */
#define PRC_UNREACH_SRCFAIL 13 /* source route failed */
#define PRC_REDIRECT_NET 14 /* net routing redirect */
#define PRC_REDIRECT_HOST 15 /* host routing redirect */
#define PRC_REDIRECT_TOSNET 16 /* redirect for type of service & net */
#define PRC_REDIRECT_TOSHOST 17 /* redirect for tos & host */
#define PRC_TIMXCEED_INTRANS 18 /* packet lifetime expired in transit */
#define PRC_TIMXCEED_REASS 19 /* lifetime expired on reass q */
#define PRC_PARAMPROB 20 /* header incorrect */
#define PRC_UNREACH_ADMIN_PROHIB 21 /* packet administrativly prohibited */
#define PRC_NCMDS 22
#define PRC_IS_REDIRECT(cmd) \
((cmd) >= PRC_REDIRECT_NET && (cmd) <= PRC_REDIRECT_TOSHOST)
#ifdef PRCREQUESTS
char *prcrequests[] = {
"IFDOWN", "ROUTEDEAD", "IFUP", "DEC-BIT-QUENCH2",
"QUENCH", "MSGSIZE", "HOSTDEAD", "#7",
"NET-UNREACH", "HOST-UNREACH", "PROTO-UNREACH", "PORT-UNREACH",
"#12", "SRCFAIL-UNREACH", "NET-REDIRECT", "HOST-REDIRECT",
"TOSNET-REDIRECT", "TOSHOST-REDIRECT", "TX-INTRANS", "TX-REASS",
"PARAMPROB", "ADMIN-UNREACH"
};
#endif
/*
* The arguments to ctloutput are:
* (*protosw[].pr_ctloutput)(req, so, level, optname, optval, p);
* req is one of the actions listed below, so is a (struct socket *),
* level is an indication of which protocol layer the option is intended.
* optname is a protocol dependent socket option request,
* optval is a pointer to a mbuf-chain pointer, for value-return results.
* The protocol is responsible for disposal of the mbuf chain *optval
* if supplied,
* the caller is responsible for any space held by *optval, when returned.
* A non-zero return from ctloutput gives an
* UNIX error number which should be passed to higher level software.
*/
#define PRCO_GETOPT 0
#define PRCO_SETOPT 1
#define PRCO_NCMDS 2
#ifdef PRCOREQUESTS
char *prcorequests[] = {
"GETOPT", "SETOPT",
};
#endif
#ifdef _KERNEL
void pfctlinput(int, struct sockaddr *);
void pfctlinput2(int, struct sockaddr *, void *);
struct domain *pffinddomain(int family);
struct protosw *pffindproto(int family, int protocol, int type);
struct protosw *pffindtype(int family, int type);
int pf_proto_register(int family, struct protosw *npr);
int pf_proto_unregister(int family, int protocol, int type);
#endif
#endif

View File

@ -34,6 +34,7 @@
#define _SYS_QUEUE_H_
#include <sys/cdefs.h>
#include <sys/_types.h>
/*
* This file defines four types of data structures: singly-linked lists,

View File

@ -0,0 +1,191 @@
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)resource.h 8.4 (Berkeley) 1/9/95
* $FreeBSD$
*/
#ifndef _SYS_RESOURCE_H_
#define _SYS_RESOURCE_H_
#include <sys/cdefs.h>
#include <sys/_types.h>
#ifndef _ID_T_DECLARED
typedef __id_t id_t;
#define _ID_T_DECLARED
#endif
#ifndef _RLIM_T_DECLARED
typedef __rlim_t rlim_t;
#define _RLIM_T_DECLARED
#endif
/*
* Process priority specifications to get/setpriority.
*/
#define PRIO_MIN -20
#define PRIO_MAX 20
#define PRIO_PROCESS 0
#define PRIO_PGRP 1
#define PRIO_USER 2
/*
* Resource utilization information.
*
* All fields are only modified by curthread and
* no locks are required to read.
*/
#define RUSAGE_SELF 0
#define RUSAGE_CHILDREN -1
#define RUSAGE_THREAD 1
struct rusage {
struct timeval ru_utime; /* user time used */
struct timeval ru_stime; /* system time used */
long ru_maxrss; /* max resident set size */
#define ru_first ru_ixrss
long ru_ixrss; /* integral shared memory size */
long ru_idrss; /* integral unshared data " */
long ru_isrss; /* integral unshared stack " */
long ru_minflt; /* page reclaims */
long ru_majflt; /* page faults */
long ru_nswap; /* swaps */
long ru_inblock; /* block input operations */
long ru_oublock; /* block output operations */
long ru_msgsnd; /* messages sent */
long ru_msgrcv; /* messages received */
long ru_nsignals; /* signals received */
long ru_nvcsw; /* voluntary context switches */
long ru_nivcsw; /* involuntary " */
#define ru_last ru_nivcsw
};
#if __BSD_VISIBLE
struct __wrusage {
struct rusage wru_self;
struct rusage wru_children;
};
#endif
/*
* Resource limits
*/
#define RLIMIT_CPU 0 /* maximum cpu time in seconds */
#define RLIMIT_FSIZE 1 /* maximum file size */
#define RLIMIT_DATA 2 /* data size */
#define RLIMIT_STACK 3 /* stack size */
#define RLIMIT_CORE 4 /* core file size */
#define RLIMIT_RSS 5 /* resident set size */
#define RLIMIT_MEMLOCK 6 /* locked-in-memory address space */
#define RLIMIT_NPROC 7 /* number of processes */
#define RLIMIT_NOFILE 8 /* number of open files */
#define RLIMIT_SBSIZE 9 /* maximum size of all socket buffers */
#define RLIMIT_VMEM 10 /* virtual process size (incl. mmap) */
#define RLIMIT_AS RLIMIT_VMEM /* standard name for RLIMIT_VMEM */
#define RLIMIT_NPTS 11 /* pseudo-terminals */
#define RLIMIT_SWAP 12 /* swap used */
#define RLIMIT_KQUEUES 13 /* kqueues allocated */
#define RLIMIT_UMTXP 14 /* process-shared umtx */
#define RLIM_NLIMITS 15 /* number of resource limits */
#define RLIM_INFINITY ((rlim_t)(((__uint64_t)1 << 63) - 1))
/* XXX Missing: RLIM_SAVED_MAX, RLIM_SAVED_CUR */
/*
* Resource limit string identifiers
*/
#ifdef _RLIMIT_IDENT
static const char *rlimit_ident[RLIM_NLIMITS] = {
"cpu",
"fsize",
"data",
"stack",
"core",
"rss",
"memlock",
"nproc",
"nofile",
"sbsize",
"vmem",
"npts",
"swap",
"kqueues",
"umtx",
};
#endif
struct rlimit {
rlim_t rlim_cur; /* current (soft) limit */
rlim_t rlim_max; /* maximum value for rlim_cur */
};
#if __BSD_VISIBLE
struct orlimit {
__int32_t rlim_cur; /* current (soft) limit */
__int32_t rlim_max; /* maximum value for rlim_cur */
};
struct loadavg {
__fixpt_t ldavg[3];
long fscale;
};
#define CP_USER 0
#define CP_NICE 1
#define CP_SYS 2
#define CP_INTR 3
#define CP_IDLE 4
#define CPUSTATES 5
#endif /* __BSD_VISIBLE */
#ifdef _KERNEL
extern struct loadavg averunnable;
void read_cpu_time(long *cp_time); /* Writes array of CPUSTATES */
#else
__BEGIN_DECLS
/* XXX 2nd arg to [gs]etpriority() should be an id_t */
int getpriority(int, int);
int getrlimit(int, struct rlimit *);
int getrusage(int, struct rusage *);
int setpriority(int, int, int);
int setrlimit(int, const struct rlimit *);
__END_DECLS
#endif /* _KERNEL */
#endif /* !_SYS_RESOURCE_H_ */

View File

@ -0,0 +1,166 @@
/*-
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)resourcevar.h 8.4 (Berkeley) 1/9/95
* $FreeBSD$
*/
#ifndef _SYS_RESOURCEVAR_H_
#define _SYS_RESOURCEVAR_H_
#include <sys/resource.h>
#include <sys/queue.h>
#ifdef _KERNEL
#include <sys/_lock.h>
#include <sys/_mutex.h>
#endif
/*
* Kernel per-process accounting / statistics
* (not necessarily resident except when running).
*
* Locking key:
* b - created at fork, never changes
* c - locked by proc mtx
* k - only accessed by curthread
* w - locked by proc itim lock
* w2 - locked by proc prof lock
*/
struct pstats {
#define pstat_startzero p_cru
struct rusage p_cru; /* Stats for reaped children. */
struct itimerval p_timer[3]; /* (w) Virtual-time timers. */
#define pstat_endzero pstat_startcopy
#define pstat_startcopy p_prof
struct uprof { /* Profile arguments. */
caddr_t pr_base; /* (c + w2) Buffer base. */
u_long pr_size; /* (c + w2) Buffer size. */
u_long pr_off; /* (c + w2) PC offset. */
u_long pr_scale; /* (c + w2) PC scaling. */
} p_prof;
#define pstat_endcopy p_start
struct timeval p_start; /* (b) Starting time. */
};
#ifdef _KERNEL
/*
* Kernel shareable process resource limits. Because this structure
* is moderately large but changes infrequently, it is normally
* shared copy-on-write after forks.
*/
struct plimit {
struct rlimit pl_rlimit[RLIM_NLIMITS];
int pl_refcnt; /* number of references */
};
struct racct;
/*-
* Per uid resource consumption. This structure is used to track
* the total resource consumption (process count, socket buffer size,
* etc) for the uid and impose limits.
*
* Locking guide:
* (a) Constant from inception
* (b) Lockless, updated using atomics
* (c) Locked by global uihashtbl_lock
* (d) Locked by the ui_vmsize_mtx
*/
struct uidinfo {
LIST_ENTRY(uidinfo) ui_hash; /* (c) hash chain of uidinfos */
struct mtx ui_vmsize_mtx;
vm_ooffset_t ui_vmsize; /* (d) swap reservation by uid */
long ui_sbsize; /* (b) socket buffer space consumed */
long ui_proccnt; /* (b) number of processes */
long ui_ptscnt; /* (b) number of pseudo-terminals */
long ui_kqcnt; /* (b) number of kqueues */
long ui_umtxcnt; /* (b) number of shared umtxs */
uid_t ui_uid; /* (a) uid */
u_int ui_ref; /* (b) reference count */
#ifdef RACCT
struct racct *ui_racct; /* (a) resource accounting */
#endif
};
#define UIDINFO_VMSIZE_LOCK(ui) mtx_lock(&((ui)->ui_vmsize_mtx))
#define UIDINFO_VMSIZE_UNLOCK(ui) mtx_unlock(&((ui)->ui_vmsize_mtx))
struct proc;
struct rusage_ext;
struct thread;
void addupc_intr(struct thread *td, uintfptr_t pc, u_int ticks);
void addupc_task(struct thread *td, uintfptr_t pc, u_int ticks);
void calccru(struct proc *p, struct timeval *up, struct timeval *sp);
void calcru(struct proc *p, struct timeval *up, struct timeval *sp);
int chgkqcnt(struct uidinfo *uip, int diff, rlim_t max);
int chgproccnt(struct uidinfo *uip, int diff, rlim_t maxval);
int chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to,
rlim_t maxval);
int chgptscnt(struct uidinfo *uip, int diff, rlim_t maxval);
int chgumtxcnt(struct uidinfo *uip, int diff, rlim_t maxval);
int fuswintr(void *base);
int kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which,
struct rlimit *limp);
struct plimit
*lim_alloc(void);
void lim_copy(struct plimit *dst, struct plimit *src);
rlim_t lim_cur(struct thread *td, int which);
rlim_t lim_cur_proc(struct proc *p, int which);
void lim_fork(struct proc *p1, struct proc *p2);
void lim_free(struct plimit *limp);
struct plimit
*lim_hold(struct plimit *limp);
rlim_t lim_max(struct thread *td, int which);
rlim_t lim_max_proc(struct proc *p, int which);
void lim_rlimit(struct thread *td, int which, struct rlimit *rlp);
void lim_rlimit_proc(struct proc *p, int which, struct rlimit *rlp);
void ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2,
struct rusage_ext *rux2);
void rucollect(struct rusage *ru, struct rusage *ru2);
void rufetch(struct proc *p, struct rusage *ru);
void rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up,
struct timeval *sp);
void rufetchtd(struct thread *td, struct rusage *ru);
void ruxagg(struct proc *p, struct thread *td);
int suswintr(void *base, int word);
struct uidinfo
*uifind(uid_t uid);
void uifree(struct uidinfo *uip);
void uihashinit(void);
void uihold(struct uidinfo *uip);
#ifdef RACCT
void ui_racct_foreach(void (*callback)(struct racct *racct,
void *arg2, void *arg3), void (*pre)(void), void (*post)(void),
void *arg2, void *arg3);
#endif
#endif /* _KERNEL */
#endif /* !_SYS_RESOURCEVAR_H_ */

View File

@ -0,0 +1,61 @@
/*-
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)select.h 8.2 (Berkeley) 1/4/94
* $FreeBSD$
*/
#ifndef _SYS_SELINFO_H_
#define _SYS_SELINFO_H_
#include <sys/event.h> /* for struct klist */
struct selfd;
TAILQ_HEAD(selfdlist, selfd);
/*
* Used to maintain information about processes that wish to be
* notified when I/O becomes possible.
*/
struct selinfo {
struct selfdlist si_tdlist; /* List of sleeping threads. */
struct knlist si_note; /* kernel note list */
struct mtx *si_mtx; /* Lock for tdlist. */
};
#define SEL_WAITING(si) (!TAILQ_EMPTY(&(si)->si_tdlist))
#ifdef _KERNEL
void seldrain(struct selinfo *sip);
void selrecord(struct thread *selector, struct selinfo *sip);
void selwakeup(struct selinfo *sip);
void selwakeuppri(struct selinfo *sip, int pri);
void seltdfini(struct thread *td);
#endif
#endif /* !_SYS_SELINFO_H_ */

View File

@ -0,0 +1,203 @@
/*-
* Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org>
* Copyright (c) 2003-2004 Alan L. Cox <alc@cs.rice.edu>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS_SF_BUF_H_
#define _SYS_SF_BUF_H_
struct sfstat { /* sendfile statistics */
uint64_t sf_syscalls; /* times sendfile was called */
uint64_t sf_noiocnt; /* times sendfile didn't require I/O */
uint64_t sf_iocnt; /* times sendfile had to do disk I/O */
uint64_t sf_pages_read; /* pages read as part of a request */
uint64_t sf_pages_valid; /* pages were valid for a request */
uint64_t sf_rhpages_requested; /* readahead pages requested */
uint64_t sf_rhpages_read; /* readahead pages read */
uint64_t sf_busy; /* times aborted on a busy page */
uint64_t sf_allocfail; /* times sfbuf allocation failed */
uint64_t sf_allocwait; /* times sfbuf allocation had to wait */
};
#ifdef _KERNEL
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/counter.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/vm_page.h>
/*
* Sf_bufs, or sendfile(2) buffers provide a vm_page that is mapped
* into kernel address space. Note, that they aren't used only
* by sendfile(2)!
*
* Sf_bufs could be implemented as a feature of vm_page_t, but that
* would require growth of the structure. That's why they are implemented
* as a separate hash indexed by vm_page address. Implementation lives in
* kern/subr_sfbuf.c. Meanwhile, most 64-bit machines have a physical map,
* so they don't require this hash at all, thus ignore subr_sfbuf.c.
*
* Different 32-bit architectures demand different requirements on sf_buf
* hash and functions. They request features in machine/vmparam.h, which
* enable parts of this file. They can also optionally provide helpers in
* machine/sf_buf.h
*
* Defines are:
* SFBUF This machine requires sf_buf hash.
* subr_sfbuf.c should be compiled.
* SFBUF_CPUSET This machine can perform SFB_CPUPRIVATE mappings,
* that do no invalidate cache on the rest of CPUs.
* SFBUF_NOMD This machine doesn't have machine/sf_buf.h
*
* SFBUF_OPTIONAL_DIRECT_MAP Value of this define is used as boolean
* variable that tells whether machine is
* capable of direct map or not at runtime.
* SFBUF_MAP This machine provides its own sf_buf_map() and
* sf_buf_unmap().
* SFBUF_PROCESS_PAGE This machine provides sf_buf_process_page()
* function.
*/
#ifdef SFBUF
#if defined(SMP) && defined(SFBUF_CPUSET)
#include <sys/_cpuset.h>
#endif
#include <sys/queue.h>
struct sf_buf {
LIST_ENTRY(sf_buf) list_entry; /* list of buffers */
TAILQ_ENTRY(sf_buf) free_entry; /* list of buffers */
vm_page_t m; /* currently mapped page */
vm_offset_t kva; /* va of mapping */
int ref_count; /* usage of this mapping */
#if defined(SMP) && defined(SFBUF_CPUSET)
cpuset_t cpumask; /* where mapping is valid */
#endif
};
#else /* ! SFBUF */
struct sf_buf;
#endif /* SFBUF */
#ifndef SFBUF_NOMD
#include <machine/sf_buf.h>
#endif
#ifdef SFBUF_OPTIONAL_DIRECT_MAP
#include <machine/md_var.h>
#endif
#ifdef SFBUF
struct sf_buf *sf_buf_alloc(struct vm_page *, int);
void sf_buf_free(struct sf_buf *);
void sf_buf_ref(struct sf_buf *);
static inline vm_offset_t
sf_buf_kva(struct sf_buf *sf)
{
#ifdef SFBUF_OPTIONAL_DIRECT_MAP
if (SFBUF_OPTIONAL_DIRECT_MAP)
return (SFBUF_PHYS_DMAP(VM_PAGE_TO_PHYS((vm_page_t)sf)));
#endif
return (sf->kva);
}
static inline vm_page_t
sf_buf_page(struct sf_buf *sf)
{
#ifdef SFBUF_OPTIONAL_DIRECT_MAP
if (SFBUF_OPTIONAL_DIRECT_MAP)
return ((vm_page_t)sf);
#endif
return (sf->m);
}
#ifndef SFBUF_MAP
#include <vm/pmap.h>
static inline void
sf_buf_map(struct sf_buf *sf, int flags)
{
pmap_qenter(sf->kva, &sf->m, 1);
}
static inline int
sf_buf_unmap(struct sf_buf *sf)
{
return (0);
}
#endif /* SFBUF_MAP */
#if defined(SMP) && defined(SFBUF_CPUSET)
void sf_buf_shootdown(struct sf_buf *, int);
#endif
#ifdef SFBUF_PROCESS_PAGE
boolean_t sf_buf_process_page(vm_page_t, void (*)(struct sf_buf *));
#endif
#else /* ! SFBUF */
static inline struct sf_buf *
sf_buf_alloc(struct vm_page *m, int pri)
{
return ((struct sf_buf *)m);
}
static inline void
sf_buf_free(struct sf_buf *sf)
{
}
static inline void
sf_buf_ref(struct sf_buf *sf)
{
}
#endif /* SFBUF */
/*
* Options to sf_buf_alloc() are specified through its flags argument. This
* argument's value should be the result of a bitwise or'ing of one or more
* of the following values.
*/
#define SFB_CATCH 1 /* Check signals if the allocation
sleeps. */
#define SFB_CPUPRIVATE 2 /* Create a CPU private mapping. */
#define SFB_DEFAULT 0
#define SFB_NOWAIT 4 /* Return NULL if all bufs are used. */
extern counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)];
#define SFSTAT_ADD(name, val) \
counter_u64_add(sfstat[offsetof(struct sfstat, name) / sizeof(uint64_t)],\
(val))
#define SFSTAT_INC(name) SFSTAT_ADD(name, 1)
#endif /* _KERNEL */
#endif /* !_SYS_SF_BUF_H_ */

View File

@ -0,0 +1,259 @@
/*-
* Copyright (c) 1982, 1986, 1990, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)socketvar.h 8.3 (Berkeley) 2/19/95
*
* $FreeBSD$
*/
#ifndef _SYS_SOCKBUF_H_
#define _SYS_SOCKBUF_H_
#include <sys/selinfo.h> /* for struct selinfo */
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <sys/_sx.h>
#include <sys/_task.h>
#define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */
/*
* Constants for sb_flags field of struct sockbuf.
*/
#define SB_WAIT 0x04 /* someone is waiting for data/space */
#define SB_SEL 0x08 /* someone is selecting */
#define SB_ASYNC 0x10 /* ASYNC I/O, need signals */
#define SB_UPCALL 0x20 /* someone wants an upcall */
#define SB_NOINTR 0x40 /* operations not interruptible */
#define SB_AIO 0x80 /* AIO operations queued */
#define SB_KNOTE 0x100 /* kernel note attached */
#define SB_NOCOALESCE 0x200 /* don't coalesce new data into existing mbufs */
#define SB_IN_TOE 0x400 /* socket buffer is in the middle of an operation */
#define SB_AUTOSIZE 0x800 /* automatically size socket buffer */
#define SB_STOP 0x1000 /* backpressure indicator */
#define SB_AIO_RUNNING 0x2000 /* AIO operation running */
#define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */
#define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */
#define SBS_RCVATMARK 0x0040 /* at mark on input */
struct mbuf;
struct sockaddr;
struct socket;
struct thread;
struct xsockbuf {
u_int sb_cc;
u_int sb_hiwat;
u_int sb_mbcnt;
u_int sb_mcnt;
u_int sb_ccnt;
u_int sb_mbmax;
int sb_lowat;
int sb_timeo;
short sb_flags;
};
/*
* Variables for socket buffering.
*
* Locking key to struct sockbuf:
* (a) locked by SOCKBUF_LOCK().
*/
struct sockbuf {
struct selinfo sb_sel; /* process selecting read/write */
struct mtx sb_mtx; /* sockbuf lock */
struct sx sb_sx; /* prevent I/O interlacing */
short sb_state; /* (a) socket state on sockbuf */
#define sb_startzero sb_mb
struct mbuf *sb_mb; /* (a) the mbuf chain */
struct mbuf *sb_mbtail; /* (a) the last mbuf in the chain */
struct mbuf *sb_lastrecord; /* (a) first mbuf of last
* record in socket buffer */
struct mbuf *sb_sndptr; /* (a) pointer into mbuf chain */
struct mbuf *sb_fnrdy; /* (a) pointer to first not ready buffer */
u_int sb_sndptroff; /* (a) byte offset of ptr into chain */
u_int sb_acc; /* (a) available chars in buffer */
u_int sb_ccc; /* (a) claimed chars in buffer */
u_int sb_hiwat; /* (a) max actual char count */
u_int sb_mbcnt; /* (a) chars of mbufs used */
u_int sb_mcnt; /* (a) number of mbufs in buffer */
u_int sb_ccnt; /* (a) number of clusters in buffer */
u_int sb_mbmax; /* (a) max chars of mbufs to use */
u_int sb_ctl; /* (a) non-data chars in buffer */
int sb_lowat; /* (a) low water mark */
sbintime_t sb_timeo; /* (a) timeout for read/write */
short sb_flags; /* (a) flags, see below */
int (*sb_upcall)(struct socket *, void *, int); /* (a) */
void *sb_upcallarg; /* (a) */
TAILQ_HEAD(, kaiocb) sb_aiojobq; /* (a) pending AIO ops */
struct task sb_aiotask; /* AIO task */
};
#ifdef _KERNEL
/*
* Per-socket buffer mutex used to protect most fields in the socket
* buffer.
*/
#define SOCKBUF_MTX(_sb) (&(_sb)->sb_mtx)
#define SOCKBUF_LOCK_INIT(_sb, _name) \
mtx_init(SOCKBUF_MTX(_sb), _name, NULL, MTX_DEF)
#define SOCKBUF_LOCK_DESTROY(_sb) mtx_destroy(SOCKBUF_MTX(_sb))
#define SOCKBUF_LOCK(_sb) mtx_lock(SOCKBUF_MTX(_sb))
#define SOCKBUF_OWNED(_sb) mtx_owned(SOCKBUF_MTX(_sb))
#define SOCKBUF_UNLOCK(_sb) mtx_unlock(SOCKBUF_MTX(_sb))
#define SOCKBUF_LOCK_ASSERT(_sb) mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED)
#define SOCKBUF_UNLOCK_ASSERT(_sb) mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED)
/*
* Socket buffer private mbuf(9) flags.
*/
#define M_NOTREADY M_PROTO1 /* m_data not populated yet */
#define M_BLOCKED M_PROTO2 /* M_NOTREADY in front of m */
#define M_NOTAVAIL (M_NOTREADY | M_BLOCKED)
void sbappend(struct sockbuf *sb, struct mbuf *m, int flags);
void sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags);
void sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags);
void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags);
int sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
struct mbuf *m0, struct mbuf *control);
int sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
struct mbuf *m0, struct mbuf *control);
int sbappendaddr_nospacecheck_locked(struct sockbuf *sb,
const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control);
int sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
struct mbuf *control);
int sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
struct mbuf *control);
void sbappendrecord(struct sockbuf *sb, struct mbuf *m0);
void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0);
void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
struct mbuf *
sbcreatecontrol(caddr_t p, int size, int type, int level);
void sbdestroy(struct sockbuf *sb, struct socket *so);
void sbdrop(struct sockbuf *sb, int len);
void sbdrop_locked(struct sockbuf *sb, int len);
struct mbuf *
sbcut_locked(struct sockbuf *sb, int len);
void sbdroprecord(struct sockbuf *sb);
void sbdroprecord_locked(struct sockbuf *sb);
void sbflush(struct sockbuf *sb);
void sbflush_locked(struct sockbuf *sb);
void sbrelease(struct sockbuf *sb, struct socket *so);
void sbrelease_internal(struct sockbuf *sb, struct socket *so);
void sbrelease_locked(struct sockbuf *sb, struct socket *so);
int sbreserve(struct sockbuf *sb, u_long cc, struct socket *so,
struct thread *td);
int sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
struct thread *td);
struct mbuf *
sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff);
struct mbuf *
sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff);
void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb);
int sbwait(struct sockbuf *sb);
int sblock(struct sockbuf *sb, int flags);
void sbunlock(struct sockbuf *sb);
void sballoc(struct sockbuf *, struct mbuf *);
void sbfree(struct sockbuf *, struct mbuf *);
int sbready(struct sockbuf *, struct mbuf *, int);
/*
* Return how much data is available to be taken out of socket
* buffer right now.
*/
static inline u_int
sbavail(struct sockbuf *sb)
{
#if 0
SOCKBUF_LOCK_ASSERT(sb);
#endif
return (sb->sb_acc);
}
/*
* Return how much data sits there in the socket buffer
* It might be that some data is not yet ready to be read.
*/
static inline u_int
sbused(struct sockbuf *sb)
{
#if 0
SOCKBUF_LOCK_ASSERT(sb);
#endif
return (sb->sb_ccc);
}
/*
* How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
* This is problematical if the fields are unsigned, as the space might
* still be negative (ccc > hiwat or mbcnt > mbmax).
*/
static inline long
sbspace(struct sockbuf *sb)
{
int bleft, mleft; /* size should match sockbuf fields */
#if 0
SOCKBUF_LOCK_ASSERT(sb);
#endif
if (sb->sb_flags & SB_STOP)
return(0);
bleft = sb->sb_hiwat - sb->sb_ccc;
mleft = sb->sb_mbmax - sb->sb_mbcnt;
return ((bleft < mleft) ? bleft : mleft);
}
#define SB_EMPTY_FIXUP(sb) do { \
if ((sb)->sb_mb == NULL) { \
(sb)->sb_mbtail = NULL; \
(sb)->sb_lastrecord = NULL; \
} \
} while (/*CONSTCOND*/0)
#ifdef SOCKBUF_DEBUG
void sblastrecordchk(struct sockbuf *, const char *, int);
void sblastmbufchk(struct sockbuf *, const char *, int);
void sbcheck(struct sockbuf *, const char *, int);
#define SBLASTRECORDCHK(sb) sblastrecordchk((sb), __FILE__, __LINE__)
#define SBLASTMBUFCHK(sb) sblastmbufchk((sb), __FILE__, __LINE__)
#define SBCHECK(sb) sbcheck((sb), __FILE__, __LINE__)
#else
#define SBLASTRECORDCHK(sb) do {} while (0)
#define SBLASTMBUFCHK(sb) do {} while (0)
#define SBCHECK(sb) do {} while (0)
#endif /* SOCKBUF_DEBUG */
#endif /* _KERNEL */
#endif /* _SYS_SOCKBUF_H_ */

View File

@ -600,5 +600,48 @@ struct mmsghdr {
};
#endif /* __BSD_VISIBLE */
#ifndef _KERNEL
#include <sys/cdefs.h>
__BEGIN_DECLS
int accept(int, struct sockaddr * __restrict, socklen_t * __restrict);
int bind(int, const struct sockaddr *, socklen_t);
int connect(int, const struct sockaddr *, socklen_t);
#if __BSD_VISIBLE
int accept4(int, struct sockaddr * __restrict, socklen_t * __restrict, int);
int bindat(int, int, const struct sockaddr *, socklen_t);
int connectat(int, int, const struct sockaddr *, socklen_t);
#endif
int getpeername(int, struct sockaddr * __restrict, socklen_t * __restrict);
int getsockname(int, struct sockaddr * __restrict, socklen_t * __restrict);
int getsockopt(int, int, int, void * __restrict, socklen_t * __restrict);
int listen(int, int);
ssize_t recv(int, void *, size_t, int);
ssize_t recvfrom(int, void *, size_t, int, struct sockaddr * __restrict, socklen_t * __restrict);
ssize_t recvmsg(int, struct msghdr *, int);
#if __BSD_VISIBLE
struct timespec;
ssize_t recvmmsg(int, struct mmsghdr * __restrict, size_t, int,
const struct timespec * __restrict);
#endif
ssize_t send(int, const void *, size_t, int);
ssize_t sendto(int, const void *,
size_t, int, const struct sockaddr *, socklen_t);
ssize_t sendmsg(int, const struct msghdr *, int);
#if __BSD_VISIBLE
int sendfile(int, int, off_t, size_t, struct sf_hdtr *, off_t *, int);
ssize_t sendmmsg(int, struct mmsghdr * __restrict, size_t, int);
int setfib(int);
#endif
int setsockopt(int, int, int, const void *, socklen_t);
int shutdown(int, int);
int sockatmark(int);
int socket(int, int, int);
int socketpair(int, int, int, int *);
__END_DECLS
#endif /* !_KERNEL */
#endif /* !_COMPAT_SYS_SOCKET_H_ */

View File

@ -0,0 +1,422 @@
/*-
* Copyright (c) 1982, 1986, 1990, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)socketvar.h 8.3 (Berkeley) 2/19/95
*
* $FreeBSD$
*/
#ifndef _SYS_SOCKETVAR_H_
#define _SYS_SOCKETVAR_H_
#include <sys/queue.h> /* for TAILQ macros */
#include <sys/selinfo.h> /* for struct selinfo */
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <sys/osd.h>
#include <sys/_sx.h>
#include <sys/sockbuf.h>
#include <sys/sockstate.h>
#ifdef _KERNEL
#include <sys/caprights.h>
#include <sys/sockopt.h>
#endif
struct vnet;
/*
* Kernel structure per socket.
* Contains send and receive buffer queues,
* handle on protocol and pointer to protocol
* private data and error information.
*/
typedef u_quad_t so_gen_t;
struct socket;
/*-
* Locking key to struct socket:
* (a) constant after allocation, no locking required.
* (b) locked by SOCK_LOCK(so).
* (c) locked by SOCKBUF_LOCK(&so->so_rcv).
* (e) locked by ACCEPT_LOCK().
* (f) not locked since integer reads/writes are atomic.
* (g) used only as a sleep/wakeup address, no value.
* (h) locked by global mutex so_global_mtx.
*/
struct socket {
int so_count; /* (b) reference count */
short so_type; /* (a) generic type, see socket.h */
short so_options; /* from socket call, see socket.h */
short so_linger; /* time to linger while closing */
short so_state; /* (b) internal state flags SS_* */
int so_qstate; /* (e) internal state flags SQ_* */
void *so_pcb; /* protocol control block */
struct vnet *so_vnet; /* (a) network stack instance */
struct protosw *so_proto; /* (a) protocol handle */
/*
* Variables for connection queuing.
* Socket where accepts occur is so_head in all subsidiary sockets.
* If so_head is 0, socket is not related to an accept.
* For head socket so_incomp queues partially completed connections,
* while so_comp is a queue of connections ready to be accepted.
* If a connection is aborted and it has so_head set, then
* it has to be pulled out of either so_incomp or so_comp.
* We allow connections to queue up based on current queue lengths
* and limit on number of queued connections for this socket.
*/
struct socket *so_head; /* (e) back pointer to listen socket */
TAILQ_HEAD(, socket) so_incomp; /* (e) queue of partial unaccepted connections */
TAILQ_HEAD(, socket) so_comp; /* (e) queue of complete unaccepted connections */
TAILQ_ENTRY(socket) so_list; /* (e) list of unaccepted connections */
u_int so_qlen; /* (e) number of unaccepted connections */
u_int so_incqlen; /* (e) number of unaccepted incomplete
connections */
u_int so_qlimit; /* (e) max number queued connections */
short so_timeo; /* (g) connection timeout */
u_short so_error; /* (f) error affecting connection */
struct sigio *so_sigio; /* [sg] information for async I/O or
out of band data (SIGURG) */
u_long so_oobmark; /* (c) chars to oob mark */
struct sockbuf so_rcv, so_snd;
struct ucred *so_cred; /* (a) user credentials */
struct label *so_label; /* (b) MAC label for socket */
struct label *so_peerlabel; /* (b) cached MAC label for peer */
/* NB: generation count must not be first. */
so_gen_t so_gencnt; /* (h) generation count */
void *so_emuldata; /* (b) private data for emulators */
struct so_accf {
struct accept_filter *so_accept_filter;
void *so_accept_filter_arg; /* saved filter args */
char *so_accept_filter_str; /* saved user args */
} *so_accf;
struct osd osd; /* Object Specific extensions */
/*
* so_fibnum, so_user_cookie and friends can be used to attach
* some user-specified metadata to a socket, which then can be
* used by the kernel for various actions.
* so_user_cookie is used by ipfw/dummynet.
*/
int so_fibnum; /* routing domain for this socket */
uint32_t so_user_cookie;
void *so_pspare[2]; /* packet pacing / general use */
int so_ispare[2]; /* packet pacing / general use */
};
/*
* Global accept mutex to serialize access to accept queues and
* fields associated with multiple sockets. This allows us to
* avoid defining a lock order between listen and accept sockets
* until such time as it proves to be a good idea.
*/
extern struct mtx accept_mtx;
#define ACCEPT_LOCK_ASSERT() mtx_assert(&accept_mtx, MA_OWNED)
#define ACCEPT_UNLOCK_ASSERT() mtx_assert(&accept_mtx, MA_NOTOWNED)
#define ACCEPT_LOCK() mtx_lock(&accept_mtx)
#define ACCEPT_UNLOCK() mtx_unlock(&accept_mtx)
/*
* Per-socket mutex: we reuse the receive socket buffer mutex for space
* efficiency. This decision should probably be revisited as we optimize
* locking for the socket code.
*/
#define SOCK_MTX(_so) SOCKBUF_MTX(&(_so)->so_rcv)
#define SOCK_LOCK(_so) SOCKBUF_LOCK(&(_so)->so_rcv)
#define SOCK_OWNED(_so) SOCKBUF_OWNED(&(_so)->so_rcv)
#define SOCK_UNLOCK(_so) SOCKBUF_UNLOCK(&(_so)->so_rcv)
#define SOCK_LOCK_ASSERT(_so) SOCKBUF_LOCK_ASSERT(&(_so)->so_rcv)
/*
* Socket state bits stored in so_qstate.
*/
#define SQ_INCOMP 0x0800 /* unaccepted, incomplete connection */
#define SQ_COMP 0x1000 /* unaccepted, complete connection */
/*
* Externalized form of struct socket used by the sysctl(3) interface.
*/
struct xsocket {
size_t xso_len; /* length of this structure */
struct socket *xso_so; /* makes a convenient handle sometimes */
short so_type;
short so_options;
short so_linger;
short so_state;
caddr_t so_pcb; /* another convenient handle */
int xso_protocol;
int xso_family;
u_int so_qlen;
u_int so_incqlen;
u_int so_qlimit;
short so_timeo;
u_short so_error;
pid_t so_pgid;
u_long so_oobmark;
struct xsockbuf so_rcv, so_snd;
uid_t so_uid; /* XXX */
};
#ifdef _KERNEL
/*
* Macros for sockets and socket buffering.
*/
/*
* Flags to sblock().
*/
#define SBL_WAIT 0x00000001 /* Wait if not immediately available. */
#define SBL_NOINTR 0x00000002 /* Force non-interruptible sleep. */
#define SBL_VALID (SBL_WAIT | SBL_NOINTR)
/*
* Do we need to notify the other side when I/O is possible?
*/
#define sb_notify(sb) (((sb)->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC | \
SB_UPCALL | SB_AIO | SB_KNOTE)) != 0)
/* do we have to send all at once on a socket? */
#define sosendallatonce(so) \
((so)->so_proto->pr_flags & PR_ATOMIC)
/* can we read something from so? */
#define soreadabledata(so) \
(sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || \
!TAILQ_EMPTY(&(so)->so_comp) || (so)->so_error)
#define soreadable(so) \
(soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE))
/* can we write something to so? */
#define sowriteable(so) \
((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \
(((so)->so_state&SS_ISCONNECTED) || \
((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0)) || \
((so)->so_snd.sb_state & SBS_CANTSENDMORE) || \
(so)->so_error)
/*
* soref()/sorele() ref-count the socket structure. Note that you must
* still explicitly close the socket, but the last ref count will free
* the structure.
*/
#define soref(so) do { \
SOCK_LOCK_ASSERT(so); \
++(so)->so_count; \
} while (0)
#define sorele(so) do { \
ACCEPT_LOCK_ASSERT(); \
SOCK_LOCK_ASSERT(so); \
if ((so)->so_count <= 0) \
panic("sorele"); \
if (--(so)->so_count == 0) \
sofree(so); \
else { \
SOCK_UNLOCK(so); \
ACCEPT_UNLOCK(); \
} \
} while (0)
/*
* In sorwakeup() and sowwakeup(), acquire the socket buffer lock to
* avoid a non-atomic test-and-wakeup. However, sowakeup is
* responsible for releasing the lock if it is called. We unlock only
* if we don't call into sowakeup. If any code is introduced that
* directly invokes the underlying sowakeup() primitives, it must
* maintain the same semantics.
*/
#define sorwakeup_locked(so) do { \
SOCKBUF_LOCK_ASSERT(&(so)->so_rcv); \
if (sb_notify(&(so)->so_rcv)) \
sowakeup((so), &(so)->so_rcv); \
else \
SOCKBUF_UNLOCK(&(so)->so_rcv); \
} while (0)
#define sorwakeup(so) do { \
SOCKBUF_LOCK(&(so)->so_rcv); \
sorwakeup_locked(so); \
} while (0)
#define sowwakeup_locked(so) do { \
SOCKBUF_LOCK_ASSERT(&(so)->so_snd); \
if (sb_notify(&(so)->so_snd)) \
sowakeup((so), &(so)->so_snd); \
else \
SOCKBUF_UNLOCK(&(so)->so_snd); \
} while (0)
#define sowwakeup(so) do { \
SOCKBUF_LOCK(&(so)->so_snd); \
sowwakeup_locked(so); \
} while (0)
struct accept_filter {
char accf_name[16];
int (*accf_callback)
(struct socket *so, void *arg, int waitflag);
void * (*accf_create)
(struct socket *so, char *arg);
void (*accf_destroy)
(struct socket *so);
SLIST_ENTRY(accept_filter) accf_next;
};
#ifdef MALLOC_DECLARE
MALLOC_DECLARE(M_ACCF);
MALLOC_DECLARE(M_PCB);
MALLOC_DECLARE(M_SONAME);
#endif
/*
* Socket specific helper hook point identifiers
* Do not leave holes in the sequence, hook registration is a loop.
*/
#define HHOOK_SOCKET_OPT 0
#define HHOOK_SOCKET_CREATE 1
#define HHOOK_SOCKET_RCV 2
#define HHOOK_SOCKET_SND 3
#define HHOOK_FILT_SOREAD 4
#define HHOOK_FILT_SOWRITE 5
#define HHOOK_SOCKET_CLOSE 6
#define HHOOK_SOCKET_LAST HHOOK_SOCKET_CLOSE
struct socket_hhook_data {
struct socket *so;
struct mbuf *m;
void *hctx; /* hook point specific data*/
int status;
};
extern int maxsockets;
extern u_long sb_max;
extern so_gen_t so_gencnt;
struct file;
struct filedesc;
struct mbuf;
struct sockaddr;
struct ucred;
struct uio;
/* 'which' values for socket upcalls. */
#define SO_RCV 1
#define SO_SND 2
/* Return values for socket upcalls. */
#define SU_OK 0
#define SU_ISCONNECTED 1
/*
* From uipc_socket and friends
*/
int getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len);
int getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp,
struct file **fpp, u_int *fflagp);
void soabort(struct socket *so);
int soaccept(struct socket *so, struct sockaddr **nam);
void soaio_enqueue(struct task *task);
void soaio_rcv(void *context, int pending);
void soaio_snd(void *context, int pending);
int socheckuid(struct socket *so, uid_t uid);
int sobind(struct socket *so, struct sockaddr *nam, struct thread *td);
int sobindat(int fd, struct socket *so, struct sockaddr *nam,
struct thread *td);
int soclose(struct socket *so);
int soconnect(struct socket *so, struct sockaddr *nam, struct thread *td);
int soconnectat(int fd, struct socket *so, struct sockaddr *nam,
struct thread *td);
int soconnect2(struct socket *so1, struct socket *so2);
int socreate(int dom, struct socket **aso, int type, int proto,
struct ucred *cred, struct thread *td);
int sodisconnect(struct socket *so);
struct sockaddr *sodupsockaddr(const struct sockaddr *sa, int mflags);
void sofree(struct socket *so);
void sohasoutofband(struct socket *so);
int solisten(struct socket *so, int backlog, struct thread *td);
void solisten_proto(struct socket *so, int backlog);
int solisten_proto_check(struct socket *so);
struct socket *
sonewconn(struct socket *head, int connstatus);
int sopoll(struct socket *so, int events, struct ucred *active_cred,
struct thread *td);
int sopoll_generic(struct socket *so, int events,
struct ucred *active_cred, struct thread *td);
int soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio,
struct mbuf **mp0, struct mbuf **controlp, int *flagsp);
int soreceive_stream(struct socket *so, struct sockaddr **paddr,
struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
int *flagsp);
int soreceive_dgram(struct socket *so, struct sockaddr **paddr,
struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
int *flagsp);
int soreceive_generic(struct socket *so, struct sockaddr **paddr,
struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
int *flagsp);
int soreserve(struct socket *so, u_long sndcc, u_long rcvcc);
void sorflush(struct socket *so);
int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
struct mbuf *top, struct mbuf *control, int flags,
struct thread *td);
int sosend_dgram(struct socket *so, struct sockaddr *addr,
struct uio *uio, struct mbuf *top, struct mbuf *control,
int flags, struct thread *td);
int sosend_generic(struct socket *so, struct sockaddr *addr,
struct uio *uio, struct mbuf *top, struct mbuf *control,
int flags, struct thread *td);
int soshutdown(struct socket *so, int how);
void sotoxsocket(struct socket *so, struct xsocket *xso);
void soupcall_clear(struct socket *so, int which);
void soupcall_set(struct socket *so, int which,
int (*func)(struct socket *, void *, int), void *arg);
void sowakeup(struct socket *so, struct sockbuf *sb);
void sowakeup_aio(struct socket *so, struct sockbuf *sb);
int selsocket(struct socket *so, int events, struct timeval *tv,
struct thread *td);
/*
* Accept filter functions (duh).
*/
int accept_filt_add(struct accept_filter *filt);
int accept_filt_del(char *name);
struct accept_filter *accept_filt_get(char *name);
#ifdef ACCEPT_FILTER_MOD
#ifdef SYSCTL_DECL
SYSCTL_DECL(_net_inet_accf);
#endif
int accept_filt_generic_mod_event(module_t mod, int event, void *data);
#endif
#endif /* _KERNEL */
#endif /* !_SYS_SOCKETVAR_H_ */

View File

@ -0,0 +1,83 @@
/*-
* Copyright (c) 1982, 1986, 1990, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)socketvar.h 8.3 (Berkeley) 2/19/95
*
* $FreeBSD$
*/
#ifndef _SYS_SOCKTATE_H_
#define _SYS_SOCKTATE_H_
/*
* Socket state bits.
*
* Historically, this bits were all kept in the so_state field. For
* locking reasons, they are now in multiple fields, as they are
* locked differently. so_state maintains basic socket state protected
* by the socket lock. so_qstate holds information about the socket
* accept queues. Each socket buffer also has a state field holding
* information relevant to that socket buffer (can't send, rcv). Many
* fields will be read without locks to improve performance and avoid
* lock order issues. However, this approach must be used with caution.
*/
#define SS_NOFDREF 0x0001 /* no file table ref any more */
#define SS_ISCONNECTED 0x0002 /* socket connected to a peer */
#define SS_ISCONNECTING 0x0004 /* in process of connecting to peer */
#define SS_ISDISCONNECTING 0x0008 /* in process of disconnecting */
#define SS_NBIO 0x0100 /* non-blocking ops */
#define SS_ASYNC 0x0200 /* async i/o notify */
#define SS_ISCONFIRMING 0x0400 /* deciding to accept connection req */
#define SS_ISDISCONNECTED 0x2000 /* socket disconnected from peer */
/*
* Protocols can mark a socket as SS_PROTOREF to indicate that, following
* pru_detach, they still want the socket to persist, and will free it
* themselves when they are done. Protocols should only ever call sofree()
* following setting this flag in pru_detach(), and never otherwise, as
* sofree() bypasses socket reference counting.
*/
#define SS_PROTOREF 0x4000 /* strong protocol reference */
/*
* Socket state bits now stored in the socket buffer state field.
*/
#define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */
#define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */
#define SBS_RCVATMARK 0x0040 /* at mark on input */
struct socket;
void soisconnected(struct socket *so);
void soisconnecting(struct socket *so);
void soisdisconnected(struct socket *so);
void soisdisconnecting(struct socket *so);
void socantrcvmore(struct socket *so);
void socantrcvmore_locked(struct socket *so);
void socantsendmore(struct socket *so);
void socantsendmore_locked(struct socket *so);
#endif /* _SYS_SOCKTATE_H_ */

View File

@ -29,6 +29,7 @@
#ifndef _COMPAT_SYS_SYSCTL_H
#define _COMPAT_SYS_SYSCTL_H
#include <sys/queue.h>
#include <sys/types.h>
#include <inttypes.h>

View File

@ -0,0 +1,119 @@
/*-
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)ucred.h 8.4 (Berkeley) 1/9/95
* $FreeBSD$
*/
#ifndef _SYS_UCRED_H_
#define _SYS_UCRED_H_
#include <bsm/audit.h>
struct loginclass;
#define XU_NGROUPS 16
/*
* Credentials.
*
* Please do not inspect cr_uid directly to determine superuserness. The
* priv(9) interface should be used to check for privilege.
*/
#if defined(_KERNEL) || defined(_WANT_UCRED)
struct ucred {
u_int cr_ref; /* reference count */
#define cr_startcopy cr_uid
uid_t cr_uid; /* effective user id */
uid_t cr_ruid; /* real user id */
uid_t cr_svuid; /* saved user id */
int cr_ngroups; /* number of groups */
gid_t cr_rgid; /* real group id */
gid_t cr_svgid; /* saved group id */
struct uidinfo *cr_uidinfo; /* per euid resource consumption */
struct uidinfo *cr_ruidinfo; /* per ruid resource consumption */
struct prison *cr_prison; /* jail(2) */
struct loginclass *cr_loginclass; /* login class */
u_int cr_flags; /* credential flags */
void *cr_pspare2[2]; /* general use 2 */
#define cr_endcopy cr_label
struct label *cr_label; /* MAC label */
struct auditinfo_addr cr_audit; /* Audit properties. */
gid_t *cr_groups; /* groups */
int cr_agroups; /* Available groups */
gid_t cr_smallgroups[XU_NGROUPS]; /* storage for small groups */
};
#define NOCRED ((struct ucred *)0) /* no credential available */
#define FSCRED ((struct ucred *)-1) /* filesystem credential */
#endif /* _KERNEL || _WANT_UCRED */
/*
* Flags for cr_flags.
*/
#define CRED_FLAG_CAPMODE 0x00000001 /* In capability mode. */
/*
* This is the external representation of struct ucred.
*/
struct xucred {
u_int cr_version; /* structure layout version */
uid_t cr_uid; /* effective user id */
short cr_ngroups; /* number of groups */
gid_t cr_groups[XU_NGROUPS]; /* groups */
void *_cr_unused1; /* compatibility with old ucred */
};
#define XUCRED_VERSION 0
/* This can be used for both ucred and xucred structures. */
#define cr_gid cr_groups[0]
#ifdef _KERNEL
struct proc;
struct thread;
void change_egid(struct ucred *newcred, gid_t egid);
void change_euid(struct ucred *newcred, struct uidinfo *euip);
void change_rgid(struct ucred *newcred, gid_t rgid);
void change_ruid(struct ucred *newcred, struct uidinfo *ruip);
void change_svgid(struct ucred *newcred, gid_t svgid);
void change_svuid(struct ucred *newcred, uid_t svuid);
void crcopy(struct ucred *dest, struct ucred *src);
struct ucred *crcopysafe(struct proc *p, struct ucred *cr);
struct ucred *crdup(struct ucred *cr);
void crextend(struct ucred *cr, int n);
void proc_set_cred_init(struct proc *p, struct ucred *cr);
struct ucred *proc_set_cred(struct proc *p, struct ucred *cr);
void crfree(struct ucred *cr);
struct ucred *crget(void);
struct ucred *crhold(struct ucred *cr);
void cru2x(struct ucred *cr, struct xucred *xcr);
void crsetgroups(struct ucred *cr, int n, gid_t *groups);
int groupmember(gid_t gid, struct ucred *cred);
#endif /* _KERNEL */
#endif /* !_SYS_UCRED_H_ */

View File

@ -0,0 +1,126 @@
/*-
* Copyright (c) 1982, 1986, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)uio.h 8.5 (Berkeley) 2/22/94
* $FreeBSD$
*/
#ifndef _SYS_UIO_H_
#define _SYS_UIO_H_
#include <sys/cdefs.h>
#include <sys/_types.h>
#include <sys/_iovec.h>
#ifndef _SSIZE_T_DECLARED
typedef __ssize_t ssize_t;
#define _SSIZE_T_DECLARED
#endif
#ifndef _OFF_T_DECLARED
typedef __off_t off_t;
#define _OFF_T_DECLARED
#endif
#if __BSD_VISIBLE
enum uio_rw { UIO_READ, UIO_WRITE };
/* Segment flag values. */
enum uio_seg {
UIO_USERSPACE, /* from user data space */
UIO_SYSSPACE, /* from system space */
UIO_NOCOPY /* don't copy, already in object */
};
#endif
#ifdef _KERNEL
struct uio {
struct iovec *uio_iov; /* scatter/gather list */
int uio_iovcnt; /* length of scatter/gather list */
off_t uio_offset; /* offset in target object */
ssize_t uio_resid; /* remaining bytes to process */
enum uio_seg uio_segflg; /* address space */
enum uio_rw uio_rw; /* operation */
struct thread *uio_td; /* owner */
};
/*
* Limits
*
* N.B.: UIO_MAXIOV must be no less than IOV_MAX from <sys/syslimits.h>
* which in turn must be no less than _XOPEN_IOV_MAX from <limits.h>. If
* we ever make this tunable (probably pointless), then IOV_MAX should be
* removed from <sys/syslimits.h> and applications would be expected to use
* sysconf(3) to find out the correct value, or else assume the worst
* (_XOPEN_IOV_MAX). Perhaps UIO_MAXIOV should be simply defined as
* IOV_MAX.
*/
#define UIO_MAXIOV 1024 /* max 1K of iov's */
struct vm_object;
struct vm_page;
struct bus_dma_segment;
struct uio *cloneuio(struct uio *uiop);
int copyinfrom(const void * __restrict src, void * __restrict dst,
size_t len, int seg);
int copyiniov(const struct iovec *iovp, u_int iovcnt, struct iovec **iov,
int error);
int copyinstrfrom(const void * __restrict src, void * __restrict dst,
size_t len, size_t * __restrict copied, int seg);
int copyinuio(const struct iovec *iovp, u_int iovcnt, struct uio **uiop);
int copyout_map(struct thread *td, vm_offset_t *addr, size_t sz);
int copyout_unmap(struct thread *td, vm_offset_t addr, size_t sz);
int physcopyin(void *src, vm_paddr_t dst, size_t len);
int physcopyout(vm_paddr_t src, void *dst, size_t len);
int physcopyin_vlist(struct bus_dma_segment *src, off_t offset,
vm_paddr_t dst, size_t len);
int physcopyout_vlist(vm_paddr_t src, struct bus_dma_segment *dst,
off_t offset, size_t len);
int uiomove(void *cp, int n, struct uio *uio);
int uiomove_frombuf(void *buf, int buflen, struct uio *uio);
int uiomove_fromphys(struct vm_page *ma[], vm_offset_t offset, int n,
struct uio *uio);
int uiomove_nofault(void *cp, int n, struct uio *uio);
int uiomove_object(struct vm_object *obj, off_t obj_size, struct uio *uio);
#else /* !_KERNEL */
__BEGIN_DECLS
ssize_t readv(int, const struct iovec *, int);
ssize_t writev(int, const struct iovec *, int);
#if __BSD_VISIBLE
ssize_t preadv(int, const struct iovec *, int, off_t);
ssize_t pwritev(int, const struct iovec *, int, off_t);
#endif
__END_DECLS
#endif /* _KERNEL */
#endif /* !_SYS_UIO_H_ */

View File

@ -0,0 +1,73 @@
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)un.h 8.3 (Berkeley) 2/19/95
* $FreeBSD$
*/
#ifndef _SYS_UN_H_
#define _SYS_UN_H_
#include <sys/cdefs.h>
#include <sys/_types.h>
#ifndef _SA_FAMILY_T_DECLARED
typedef __sa_family_t sa_family_t;
#define _SA_FAMILY_T_DECLARED
#endif
/*
* Definitions for UNIX IPC domain.
*/
struct sockaddr_un {
unsigned char sun_len; /* sockaddr len including null */
sa_family_t sun_family; /* AF_UNIX */
char sun_path[104]; /* path name (gag) */
};
#if __BSD_VISIBLE
/* Socket options. */
#define LOCAL_PEERCRED 1 /* retrieve peer credentials */
#define LOCAL_CREDS 2 /* pass credentials to receiver */
#define LOCAL_CONNWAIT 4 /* connects block until accepted */
/* Start of reserved space for third-party socket options. */
#define LOCAL_VENDOR SO_VENDOR
#ifndef _KERNEL
/* actual length of an initialized sockaddr_un */
#define SUN_LEN(su) \
(sizeof(*(su)) - sizeof((su)->sun_path) + strlen((su)->sun_path))
#endif /* !_KERNEL */
#endif /* __BSD_VISIBLE */
#endif /* !_SYS_UN_H_ */

View File

@ -0,0 +1,149 @@
/*-
* Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)unpcb.h 8.1 (Berkeley) 6/2/93
* $FreeBSD$
*/
#ifndef _SYS_UNPCB_H_
#define _SYS_UNPCB_H_
#include <sys/queue.h>
#include <sys/ucred.h>
/*
* Protocol control block for an active
* instance of a UNIX internal protocol.
*
* A socket may be associated with a vnode in the
* filesystem. If so, the unp_vnode pointer holds
* a reference count to this vnode, which should be irele'd
* when the socket goes away.
*
* A socket may be connected to another socket, in which
* case the control block of the socket to which it is connected
* is given by unp_conn.
*
* A socket may be referenced by a number of sockets (e.g. several
* sockets may be connected to a datagram socket.) These sockets
* are in a linked list starting with unp_refs, linked through
* unp_nextref and null-terminated. Note that a socket may be referenced
* by a number of other sockets and may also reference a socket (not
* necessarily one which is referencing it). This generates
* the need for unp_refs and unp_nextref to be separate fields.
*
* Stream sockets keep copies of receive sockbuf sb_cc and sb_mbcnt
* so that changes in the sockbuf may be computed to modify
* back pressure on the sender accordingly.
*/
typedef u_quad_t unp_gen_t;
LIST_HEAD(unp_head, unpcb);
struct unpcb {
LIST_ENTRY(unpcb) unp_link; /* glue on list of all PCBs */
struct socket *unp_socket; /* pointer back to socket */
struct file *unp_file; /* back-pointer to file for gc. */
struct vnode *unp_vnode; /* if associated with file */
ino_t unp_ino; /* fake inode number */
struct unpcb *unp_conn; /* control block of connected socket */
struct unp_head unp_refs; /* referencing socket linked list */
LIST_ENTRY(unpcb) unp_reflink; /* link in unp_refs list */
struct sockaddr_un *unp_addr; /* bound address of socket */
int reserved1;
int reserved2;
unp_gen_t unp_gencnt; /* generation count of this instance */
short unp_flags; /* flags */
short unp_gcflag; /* Garbage collector flags. */
struct xucred unp_peercred; /* peer credentials, if applicable */
u_int unp_refcount;
u_int unp_msgcount; /* references from message queue */
struct mtx unp_mtx; /* mutex */
};
/*
* Flags in unp_flags.
*
* UNP_HAVEPC - indicates that the unp_peercred member is filled in
* and is really the credentials of the connected peer. This is used
* to determine whether the contents should be sent to the user or
* not.
*
* UNP_HAVEPCCACHED - indicates that the unp_peercred member is filled
* in, but does *not* contain the credentials of the connected peer
* (there may not even be a peer). This is set in unp_listen() when
* it fills in unp_peercred for later consumption by unp_connect().
*/
#define UNP_HAVEPC 0x001
#define UNP_HAVEPCCACHED 0x002
#define UNP_WANTCRED 0x004 /* credentials wanted */
#define UNP_CONNWAIT 0x008 /* connect blocks until accepted */
#define UNPGC_REF 0x1 /* unpcb has external ref. */
#define UNPGC_DEAD 0x2 /* unpcb might be dead. */
#define UNPGC_SCANNED 0x4 /* Has been scanned. */
#define UNPGC_IGNORE_RIGHTS 0x8 /* Attached rights are freed */
/*
* These flags are used to handle non-atomicity in connect() and bind()
* operations on a socket: in particular, to avoid races between multiple
* threads or processes operating simultaneously on the same socket.
*/
#define UNP_CONNECTING 0x010 /* Currently connecting. */
#define UNP_BINDING 0x020 /* Currently binding. */
#define sotounpcb(so) ((struct unpcb *)((so)->so_pcb))
/* Hack alert -- this structure depends on <sys/socketvar.h>. */
#ifdef _SYS_SOCKETVAR_H_
struct xunpcb {
size_t xu_len; /* length of this structure */
struct unpcb *xu_unpp; /* to help netstat, fstat */
struct unpcb xu_unp; /* our information */
union {
struct sockaddr_un xuu_addr; /* our bound address */
char xu_dummy1[256];
} xu_au;
#define xu_addr xu_au.xuu_addr
union {
struct sockaddr_un xuu_caddr; /* their bound address */
char xu_dummy2[256];
} xu_cau;
#define xu_caddr xu_cau.xuu_caddr
struct xsocket xu_socket;
u_quad_t xu_alignment_hack;
};
struct xunpgen {
size_t xug_len;
u_int xug_count;
unp_gen_t xug_gen;
so_gen_t xug_sogen;
};
#endif /* _SYS_SOCKETVAR_H_ */
#endif /* _SYS_UNPCB_H_ */

View File

@ -0,0 +1,569 @@
/*-
* Copyright (c) 1982, 1986, 1989, 1991, 1993
* The Regents of the University of California.
* Copyright (c) 2007 Robert N. M. Watson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)user.h 8.2 (Berkeley) 9/23/93
* $FreeBSD$
*/
#ifndef _SYS_USER_H_
#define _SYS_USER_H_
//#include <machine/pcb.h>
#ifndef _KERNEL
/* stuff that *used* to be included by user.h, or is now needed */
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/ucred.h>
#include <sys/uio.h>
#include <sys/queue.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
//#include <sys/proc.h>
//#include <vm/vm.h> /* XXX */
//#include <vm/vm_param.h> /* XXX */
//#include <vm/pmap.h> /* XXX */
//#include <vm/vm_map.h> /* XXX */
#endif /* !_KERNEL */
#ifndef _SYS_RESOURCEVAR_H_
#include <sys/resourcevar.h>
#endif
//#ifndef _SYS_SIGNALVAR_H_
//#include <sys/signalvar.h>
//#endif
#ifndef _SYS_SOCKET_VAR_H_
#include <sys/socket.h>
#endif
#include <sys/caprights.h>
#include <sys/priority.h>
/*
* KERN_PROC subtype ops return arrays of selected proc structure entries:
*
* This struct includes several arrays of spare space, with different arrays
* for different standard C-types. When adding new variables to this struct,
* the space for byte-aligned data should be taken from the ki_sparestring,
* pointers from ki_spareptrs, word-aligned data from ki_spareints, and
* doubleword-aligned data from ki_sparelongs. Make sure the space for new
* variables come from the array which matches the size and alignment of
* those variables on ALL hardware platforms, and then adjust the appropriate
* KI_NSPARE_* value(s) to match.
*
* Always verify that sizeof(struct kinfo_proc) == KINFO_PROC_SIZE on all
* platforms after you have added new variables. Note that if you change
* the value of KINFO_PROC_SIZE, then many userland programs will stop
* working until they are recompiled!
*
* Once you have added the new field, you will need to add code to initialize
* it in two places: function fill_kinfo_proc in sys/kern/kern_proc.c and
* function kvm_proclist in lib/libkvm/kvm_proc.c .
*/
#define KI_NSPARE_INT 4
#define KI_NSPARE_LONG 12
#define KI_NSPARE_PTR 6
//#ifndef _KERNEL
//#ifndef KINFO_PROC_SIZE
//#error "Unknown architecture"
//#endif
//#endif /* !_KERNEL */
#define WMESGLEN 8 /* size of returned wchan message */
#define LOCKNAMELEN 8 /* size of returned lock name */
#define TDNAMLEN 16 /* size of returned thread name */
#define COMMLEN 19 /* size of returned ki_comm name */
#define KI_EMULNAMELEN 16 /* size of returned ki_emul */
#define KI_NGROUPS 16 /* number of groups in ki_groups */
#define LOGNAMELEN 17 /* size of returned ki_login */
#define LOGINCLASSLEN 17 /* size of returned ki_loginclass */
#ifndef BURN_BRIDGES
#define OCOMMLEN TDNAMLEN
#define ki_ocomm ki_tdname
#endif
/* Flags for the process credential. */
#define KI_CRF_CAPABILITY_MODE 0x00000001
/*
* Steal a bit from ki_cr_flags to indicate that the cred had more than
* KI_NGROUPS groups.
*/
#define KI_CRF_GRP_OVERFLOW 0x80000000
struct kinfo_proc {
int ki_structsize; /* size of this structure */
int ki_layout; /* reserved: layout identifier */
struct pargs *ki_args; /* address of command arguments */
struct proc *ki_paddr; /* address of proc */
struct user *ki_addr; /* kernel virtual addr of u-area */
struct vnode *ki_tracep; /* pointer to trace file */
struct vnode *ki_textvp; /* pointer to executable file */
struct filedesc *ki_fd; /* pointer to open file info */
struct vmspace *ki_vmspace; /* pointer to kernel vmspace struct */
void *ki_wchan; /* sleep address */
pid_t ki_pid; /* Process identifier */
pid_t ki_ppid; /* parent process id */
pid_t ki_pgid; /* process group id */
pid_t ki_tpgid; /* tty process group id */
pid_t ki_sid; /* Process session ID */
pid_t ki_tsid; /* Terminal session ID */
short ki_jobc; /* job control counter */
short ki_spare_short1; /* unused (just here for alignment) */
dev_t ki_tdev; /* controlling tty dev */
sigset_t ki_siglist; /* Signals arrived but not delivered */
sigset_t ki_sigmask; /* Current signal mask */
sigset_t ki_sigignore; /* Signals being ignored */
sigset_t ki_sigcatch; /* Signals being caught by user */
uid_t ki_uid; /* effective user id */
uid_t ki_ruid; /* Real user id */
uid_t ki_svuid; /* Saved effective user id */
gid_t ki_rgid; /* Real group id */
gid_t ki_svgid; /* Saved effective group id */
short ki_ngroups; /* number of groups */
short ki_spare_short2; /* unused (just here for alignment) */
gid_t ki_groups[KI_NGROUPS]; /* groups */
vm_size_t ki_size; /* virtual size */
segsz_t ki_rssize; /* current resident set size in pages */
segsz_t ki_swrss; /* resident set size before last swap */
segsz_t ki_tsize; /* text size (pages) XXX */
segsz_t ki_dsize; /* data size (pages) XXX */
segsz_t ki_ssize; /* stack size (pages) */
u_short ki_xstat; /* Exit status for wait & stop signal */
u_short ki_acflag; /* Accounting flags */
fixpt_t ki_pctcpu; /* %cpu for process during ki_swtime */
u_int ki_estcpu; /* Time averaged value of ki_cpticks */
u_int ki_slptime; /* Time since last blocked */
u_int ki_swtime; /* Time swapped in or out */
u_int ki_cow; /* number of copy-on-write faults */
u_int64_t ki_runtime; /* Real time in microsec */
struct timeval ki_start; /* starting time */
struct timeval ki_childtime; /* time used by process children */
long ki_flag; /* P_* flags */
long ki_kiflag; /* KI_* flags (below) */
int ki_traceflag; /* Kernel trace points */
char ki_stat; /* S* process status */
signed char ki_nice; /* Process "nice" value */
char ki_lock; /* Process lock (prevent swap) count */
char ki_rqindex; /* Run queue index */
u_char ki_oncpu_old; /* Which cpu we are on (legacy) */
u_char ki_lastcpu_old; /* Last cpu we were on (legacy) */
char ki_tdname[TDNAMLEN+1]; /* thread name */
char ki_wmesg[WMESGLEN+1]; /* wchan message */
char ki_login[LOGNAMELEN+1]; /* setlogin name */
char ki_lockname[LOCKNAMELEN+1]; /* lock name */
char ki_comm[COMMLEN+1]; /* command name */
char ki_emul[KI_EMULNAMELEN+1]; /* emulation name */
char ki_loginclass[LOGINCLASSLEN+1]; /* login class */
/*
* When adding new variables, take space for char-strings from the
* front of ki_sparestrings, and ints from the end of ki_spareints.
* That way the spare room from both arrays will remain contiguous.
*/
char ki_sparestrings[50]; /* spare string space */
int ki_spareints[KI_NSPARE_INT]; /* spare room for growth */
int ki_oncpu; /* Which cpu we are on */
int ki_lastcpu; /* Last cpu we were on */
int ki_tracer; /* Pid of tracing process */
int ki_flag2; /* P2_* flags */
int ki_fibnum; /* Default FIB number */
u_int ki_cr_flags; /* Credential flags */
int ki_jid; /* Process jail ID */
int ki_numthreads; /* XXXKSE number of threads in total */
lwpid_t ki_tid; /* XXXKSE thread id */
struct priority ki_pri; /* process priority */
struct rusage ki_rusage; /* process rusage statistics */
/* XXX - most fields in ki_rusage_ch are not (yet) filled in */
struct rusage ki_rusage_ch; /* rusage of children processes */
struct pcb *ki_pcb; /* kernel virtual addr of pcb */
void *ki_kstack; /* kernel virtual addr of stack */
void *ki_udata; /* User convenience pointer */
struct thread *ki_tdaddr; /* address of thread */
/*
* When adding new variables, take space for pointers from the
* front of ki_spareptrs, and longs from the end of ki_sparelongs.
* That way the spare room from both arrays will remain contiguous.
*/
void *ki_spareptrs[KI_NSPARE_PTR]; /* spare room for growth */
long ki_sparelongs[KI_NSPARE_LONG]; /* spare room for growth */
long ki_sflag; /* PS_* flags */
long ki_tdflags; /* XXXKSE kthread flag */
};
void fill_kinfo_proc(struct proc *, struct kinfo_proc *);
/* XXX - the following two defines are temporary */
#define ki_childstime ki_rusage_ch.ru_stime
#define ki_childutime ki_rusage_ch.ru_utime
/*
* Legacy PS_ flag. This moved to p_flag but is maintained for
* compatibility.
*/
#define PS_INMEM 0x00001 /* Loaded into memory. */
/* ki_sessflag values */
#define KI_CTTY 0x00000001 /* controlling tty vnode active */
#define KI_SLEADER 0x00000002 /* session leader */
#define KI_LOCKBLOCK 0x00000004 /* proc blocked on lock ki_lockname */
/*
* This used to be the per-process structure containing data that
* isn't needed in core when the process is swapped out, but now it
* remains only for the benefit of a.out core dumps.
*/
struct user {
struct pstats u_stats; /* *p_stats */
struct kinfo_proc u_kproc; /* eproc */
};
/*
* The KERN_PROC_FILE sysctl allows a process to dump the file descriptor
* array of another process.
*/
#define KF_ATTR_VALID 0x0001
#define KF_TYPE_NONE 0
#define KF_TYPE_VNODE 1
#define KF_TYPE_SOCKET 2
#define KF_TYPE_PIPE 3
#define KF_TYPE_FIFO 4
#define KF_TYPE_KQUEUE 5
#define KF_TYPE_CRYPTO 6
#define KF_TYPE_MQUEUE 7
#define KF_TYPE_SHM 8
#define KF_TYPE_SEM 9
#define KF_TYPE_PTS 10
#define KF_TYPE_PROCDESC 11
#define KF_TYPE_UNKNOWN 255
#define KF_VTYPE_VNON 0
#define KF_VTYPE_VREG 1
#define KF_VTYPE_VDIR 2
#define KF_VTYPE_VBLK 3
#define KF_VTYPE_VCHR 4
#define KF_VTYPE_VLNK 5
#define KF_VTYPE_VSOCK 6
#define KF_VTYPE_VFIFO 7
#define KF_VTYPE_VBAD 8
#define KF_VTYPE_UNKNOWN 255
#define KF_FD_TYPE_CWD -1 /* Current working directory */
#define KF_FD_TYPE_ROOT -2 /* Root directory */
#define KF_FD_TYPE_JAIL -3 /* Jail directory */
#define KF_FD_TYPE_TRACE -4 /* Ktrace vnode */
#define KF_FD_TYPE_TEXT -5 /* Text vnode */
#define KF_FD_TYPE_CTTY -6 /* Controlling terminal */
#define KF_FLAG_READ 0x00000001
#define KF_FLAG_WRITE 0x00000002
#define KF_FLAG_APPEND 0x00000004
#define KF_FLAG_ASYNC 0x00000008
#define KF_FLAG_FSYNC 0x00000010
#define KF_FLAG_NONBLOCK 0x00000020
#define KF_FLAG_DIRECT 0x00000040
#define KF_FLAG_HASLOCK 0x00000080
#define KF_FLAG_SHLOCK 0x00000100
#define KF_FLAG_EXLOCK 0x00000200
#define KF_FLAG_NOFOLLOW 0x00000400
#define KF_FLAG_CREAT 0x00000800
#define KF_FLAG_TRUNC 0x00001000
#define KF_FLAG_EXCL 0x00002000
#define KF_FLAG_EXEC 0x00004000
/*
* Old format. Has variable hidden padding due to alignment.
* This is a compatibility hack for pre-build 7.1 packages.
*/
#if defined(__amd64__)
#define KINFO_OFILE_SIZE 1328
#endif
#if defined(__i386__)
#define KINFO_OFILE_SIZE 1324
#endif
struct kinfo_ofile {
int kf_structsize; /* Size of kinfo_file. */
int kf_type; /* Descriptor type. */
int kf_fd; /* Array index. */
int kf_ref_count; /* Reference count. */
int kf_flags; /* Flags. */
/* XXX Hidden alignment padding here on amd64 */
off_t kf_offset; /* Seek location. */
int kf_vnode_type; /* Vnode type. */
int kf_sock_domain; /* Socket domain. */
int kf_sock_type; /* Socket type. */
int kf_sock_protocol; /* Socket protocol. */
char kf_path[PATH_MAX]; /* Path to file, if any. */
struct sockaddr_storage kf_sa_local; /* Socket address. */
struct sockaddr_storage kf_sa_peer; /* Peer address. */
};
#if defined(__amd64__) || defined(__i386__)
/*
* This size should never be changed. If you really need to, you must provide
* backward ABI compatibility by allocating a new sysctl MIB that will return
* the new structure. The current structure has to be returned by the current
* sysctl MIB. See how it is done for the kinfo_ofile structure.
*/
#define KINFO_FILE_SIZE 1392
#endif
struct kinfo_file {
int kf_structsize; /* Variable size of record. */
int kf_type; /* Descriptor type. */
int kf_fd; /* Array index. */
int kf_ref_count; /* Reference count. */
int kf_flags; /* Flags. */
int kf_pad0; /* Round to 64 bit alignment. */
int64_t kf_offset; /* Seek location. */
int kf_vnode_type; /* Vnode type. */
int kf_sock_domain; /* Socket domain. */
int kf_sock_type; /* Socket type. */
int kf_sock_protocol; /* Socket protocol. */
struct sockaddr_storage kf_sa_local; /* Socket address. */
struct sockaddr_storage kf_sa_peer; /* Peer address. */
union {
struct {
/* Address of so_pcb. */
uint64_t kf_sock_pcb;
/* Address of inp_ppcb. */
uint64_t kf_sock_inpcb;
/* Address of unp_conn. */
uint64_t kf_sock_unpconn;
/* Send buffer state. */
uint16_t kf_sock_snd_sb_state;
/* Receive buffer state. */
uint16_t kf_sock_rcv_sb_state;
/* Round to 64 bit alignment. */
uint32_t kf_sock_pad0;
} kf_sock;
struct {
/* Global file id. */
uint64_t kf_file_fileid;
/* File size. */
uint64_t kf_file_size;
/* Vnode filesystem id. */
uint32_t kf_file_fsid;
/* File device. */
uint32_t kf_file_rdev;
/* File mode. */
uint16_t kf_file_mode;
/* Round to 64 bit alignment. */
uint16_t kf_file_pad0;
uint32_t kf_file_pad1;
} kf_file;
struct {
uint32_t kf_sem_value;
uint16_t kf_sem_mode;
} kf_sem;
struct {
uint64_t kf_pipe_addr;
uint64_t kf_pipe_peer;
uint32_t kf_pipe_buffer_cnt;
/* Round to 64 bit alignment. */
uint32_t kf_pipe_pad0[3];
} kf_pipe;
struct {
uint32_t kf_pts_dev;
/* Round to 64 bit alignment. */
uint32_t kf_pts_pad0[7];
} kf_pts;
struct {
pid_t kf_pid;
} kf_proc;
} kf_un;
uint16_t kf_status; /* Status flags. */
uint16_t kf_pad1; /* Round to 32 bit alignment. */
int _kf_ispare0; /* Space for more stuff. */
cap_rights_t kf_cap_rights; /* Capability rights. */
uint64_t _kf_cap_spare; /* Space for future cap_rights_t. */
/* Truncated before copyout in sysctl */
char kf_path[PATH_MAX]; /* Path to file, if any. */
};
/*
* The KERN_PROC_VMMAP sysctl allows a process to dump the VM layout of
* another process as a series of entries.
*/
#define KVME_TYPE_NONE 0
#define KVME_TYPE_DEFAULT 1
#define KVME_TYPE_VNODE 2
#define KVME_TYPE_SWAP 3
#define KVME_TYPE_DEVICE 4
#define KVME_TYPE_PHYS 5
#define KVME_TYPE_DEAD 6
#define KVME_TYPE_SG 7
#define KVME_TYPE_MGTDEVICE 8
#define KVME_TYPE_UNKNOWN 255
#define KVME_PROT_READ 0x00000001
#define KVME_PROT_WRITE 0x00000002
#define KVME_PROT_EXEC 0x00000004
#define KVME_FLAG_COW 0x00000001
#define KVME_FLAG_NEEDS_COPY 0x00000002
#define KVME_FLAG_NOCOREDUMP 0x00000004
#define KVME_FLAG_SUPER 0x00000008
#define KVME_FLAG_GROWS_UP 0x00000010
#define KVME_FLAG_GROWS_DOWN 0x00000020
#if defined(__amd64__)
#define KINFO_OVMENTRY_SIZE 1168
#endif
#if defined(__i386__)
#define KINFO_OVMENTRY_SIZE 1128
#endif
struct kinfo_ovmentry {
int kve_structsize; /* Size of kinfo_vmmapentry. */
int kve_type; /* Type of map entry. */
void *kve_start; /* Starting address. */
void *kve_end; /* Finishing address. */
int kve_flags; /* Flags on map entry. */
int kve_resident; /* Number of resident pages. */
int kve_private_resident; /* Number of private pages. */
int kve_protection; /* Protection bitmask. */
int kve_ref_count; /* VM obj ref count. */
int kve_shadow_count; /* VM obj shadow count. */
char kve_path[PATH_MAX]; /* Path to VM obj, if any. */
void *_kve_pspare[8]; /* Space for more stuff. */
off_t kve_offset; /* Mapping offset in object */
uint64_t kve_fileid; /* inode number if vnode */
dev_t kve_fsid; /* dev_t of vnode location */
int _kve_ispare[3]; /* Space for more stuff. */
};
#if defined(__amd64__) || defined(__i386__)
#define KINFO_VMENTRY_SIZE 1160
#endif
struct kinfo_vmentry {
int kve_structsize; /* Variable size of record. */
int kve_type; /* Type of map entry. */
uint64_t kve_start; /* Starting address. */
uint64_t kve_end; /* Finishing address. */
uint64_t kve_offset; /* Mapping offset in object */
uint64_t kve_vn_fileid; /* inode number if vnode */
uint32_t kve_vn_fsid; /* dev_t of vnode location */
int kve_flags; /* Flags on map entry. */
int kve_resident; /* Number of resident pages. */
int kve_private_resident; /* Number of private pages. */
int kve_protection; /* Protection bitmask. */
int kve_ref_count; /* VM obj ref count. */
int kve_shadow_count; /* VM obj shadow count. */
int kve_vn_type; /* Vnode type. */
uint64_t kve_vn_size; /* File size. */
uint32_t kve_vn_rdev; /* Device id if device. */
uint16_t kve_vn_mode; /* File mode. */
uint16_t kve_status; /* Status flags. */
int _kve_ispare[12]; /* Space for more stuff. */
/* Truncated before copyout in sysctl */
char kve_path[PATH_MAX]; /* Path to VM obj, if any. */
};
/*
* The "vm.objects" sysctl provides a list of all VM objects in the system
* via an array of these entries.
*/
struct kinfo_vmobject {
int kvo_structsize; /* Variable size of record. */
int kvo_type; /* Object type: KVME_TYPE_*. */
uint64_t kvo_size; /* Object size in pages. */
uint64_t kvo_vn_fileid; /* inode number if vnode. */
uint32_t kvo_vn_fsid; /* dev_t of vnode location. */
int kvo_ref_count; /* Reference count. */
int kvo_shadow_count; /* Shadow count. */
int kvo_memattr; /* Memory attribute. */
uint64_t kvo_resident; /* Number of resident pages. */
uint64_t kvo_active; /* Number of active pages. */
uint64_t kvo_inactive; /* Number of inactive pages. */
uint64_t _kvo_qspare[8];
uint32_t _kvo_ispare[8];
char kvo_path[PATH_MAX]; /* Pathname, if any. */
};
/*
* The KERN_PROC_KSTACK sysctl allows a process to dump the kernel stacks of
* another process as a series of entries. Each stack is represented by a
* series of symbol names and offsets as generated by stack_sbuf_print(9).
*/
#define KKST_MAXLEN 1024
#define KKST_STATE_STACKOK 0 /* Stack is valid. */
#define KKST_STATE_SWAPPED 1 /* Stack swapped out. */
#define KKST_STATE_RUNNING 2 /* Stack ephemeral. */
#if defined(__amd64__) || defined(__i386__)
#define KINFO_KSTACK_SIZE 1096
#endif
struct kinfo_kstack {
lwpid_t kkst_tid; /* ID of thread. */
int kkst_state; /* Validity of stack. */
char kkst_trace[KKST_MAXLEN]; /* String representing stack. */
int _kkst_ispare[16]; /* Space for more stuff. */
};
struct kinfo_sigtramp {
void *ksigtramp_start;
void *ksigtramp_end;
void *ksigtramp_spare[4];
};
#ifdef _KERNEL
/* Flags for kern_proc_out function. */
#define KERN_PROC_NOTHREADS 0x1
#define KERN_PROC_MASK32 0x2
/* Flags for kern_proc_filedesc_out. */
#define KERN_FILEDESC_PACK_KINFO 0x00000001U
/* Flags for kern_proc_vmmap_out. */
#define KERN_VMMAP_PACK_KINFO 0x00000001U
struct sbuf;
/*
* The kern_proc out functions are helper functions to dump process
* miscellaneous kinfo structures to sbuf. The main consumers are KERN_PROC
* sysctls but they may also be used by other kernel subsystems.
*
* The functions manipulate the process locking state and expect the process
* to be locked on enter. On return the process is unlocked.
*/
int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
int flags);
int kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen);
int kern_proc_out(struct proc *p, struct sbuf *sb, int flags);
int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
int flags);
int vntype_to_kinfo(int vtype);
#endif /* !_KERNEL */
#endif

View File

@ -0,0 +1,695 @@
/*-
* Copyright (c) 2002, 2003, 2004, 2005 Jeffrey Roberson <jeff@FreeBSD.org>
* Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*
*/
/*
* uma.h - External definitions for the Universal Memory Allocator
*
*/
#ifndef _VM_UMA_H_
#define _VM_UMA_H_
#include <sys/_types.h>
#include <sys/param.h> /* For NULL */
#include <sys/malloc.h> /* For M_* */
/* User visible parameters */
#define UMA_SMALLEST_UNIT (PAGE_SIZE / 256) /* Smallest item allocated */
/* Types and type defs */
struct uma_zone;
/* Opaque type used as a handle to the zone */
typedef struct uma_zone * uma_zone_t;
void zone_drain(uma_zone_t);
/*
* Item constructor
*
* Arguments:
* item A pointer to the memory which has been allocated.
* arg The arg field passed to uma_zalloc_arg
* size The size of the allocated item
* flags See zalloc flags
*
* Returns:
* 0 on success
* errno on failure
*
* Discussion:
* The constructor is called just before the memory is returned
* to the user. It may block if necessary.
*/
typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags);
/*
* Item destructor
*
* Arguments:
* item A pointer to the memory which has been allocated.
* size The size of the item being destructed.
* arg Argument passed through uma_zfree_arg
*
* Returns:
* Nothing
*
* Discussion:
* The destructor may perform operations that differ from those performed
* by the initializer, but it must leave the object in the same state.
* This IS type stable storage. This is called after EVERY zfree call.
*/
typedef void (*uma_dtor)(void *mem, int size, void *arg);
/*
* Item initializer
*
* Arguments:
* item A pointer to the memory which has been allocated.
* size The size of the item being initialized.
* flags See zalloc flags
*
* Returns:
* 0 on success
* errno on failure
*
* Discussion:
* The initializer is called when the memory is cached in the uma zone.
* The initializer and the destructor should leave the object in the same
* state.
*/
typedef int (*uma_init)(void *mem, int size, int flags);
/*
* Item discard function
*
* Arguments:
* item A pointer to memory which has been 'freed' but has not left the
* zone's cache.
* size The size of the item being discarded.
*
* Returns:
* Nothing
*
* Discussion:
* This routine is called when memory leaves a zone and is returned to the
* system for other uses. It is the counter-part to the init function.
*/
typedef void (*uma_fini)(void *mem, int size);
/*
* Import new memory into a cache zone.
*/
typedef int (*uma_import)(void *arg, void **store, int count, int flags);
/*
* Free memory from a cache zone.
*/
typedef void (*uma_release)(void *arg, void **store, int count);
/*
* What's the difference between initializing and constructing?
*
* The item is initialized when it is cached, and this is the state that the
* object should be in when returned to the allocator. The purpose of this is
* to remove some code which would otherwise be called on each allocation by
* utilizing a known, stable state. This differs from the constructor which
* will be called on EVERY allocation.
*
* For example, in the initializer you may want to initialize embedded locks,
* NULL list pointers, set up initial states, magic numbers, etc. This way if
* the object is held in the allocator and re-used it won't be necessary to
* re-initialize it.
*
* The constructor may be used to lock a data structure, link it on to lists,
* bump reference counts or total counts of outstanding structures, etc.
*
*/
/* Function proto types */
/*
* Create a new uma zone
*
* Arguments:
* name The text name of the zone for debugging and stats. This memory
* should not be freed until the zone has been deallocated.
* size The size of the object that is being created.
* ctor The constructor that is called when the object is allocated.
* dtor The destructor that is called when the object is freed.
* init An initializer that sets up the initial state of the memory.
* fini A discard function that undoes initialization done by init.
* ctor/dtor/init/fini may all be null, see notes above.
* align A bitmask that corresponds to the requested alignment
* eg 4 would be 0x3
* flags A set of parameters that control the behavior of the zone.
*
* Returns:
* A pointer to a structure which is intended to be opaque to users of
* the interface. The value may be null if the wait flag is not set.
*/
uma_zone_t uma_zcreate(const char *name, size_t size, uma_ctor ctor,
uma_dtor dtor, uma_init uminit, uma_fini fini,
int align, uint32_t flags);
/*
* Create a secondary uma zone
*
* Arguments:
* name The text name of the zone for debugging and stats. This memory
* should not be freed until the zone has been deallocated.
* ctor The constructor that is called when the object is allocated.
* dtor The destructor that is called when the object is freed.
* zinit An initializer that sets up the initial state of the memory
* as the object passes from the Keg's slab to the Zone's cache.
* zfini A discard function that undoes initialization done by init
* as the object passes from the Zone's cache to the Keg's slab.
*
* ctor/dtor/zinit/zfini may all be null, see notes above.
* Note that the zinit and zfini specified here are NOT
* exactly the same as the init/fini specified to uma_zcreate()
* when creating a master zone. These zinit/zfini are called
* on the TRANSITION from keg to zone (and vice-versa). Once
* these are set, the primary zone may alter its init/fini
* (which are called when the object passes from VM to keg)
* using uma_zone_set_init/fini()) as well as its own
* zinit/zfini (unset by default for master zone) with
* uma_zone_set_zinit/zfini() (note subtle 'z' prefix).
*
* master A reference to this zone's Master Zone (Primary Zone),
* which contains the backing Keg for the Secondary Zone
* being added.
*
* Returns:
* A pointer to a structure which is intended to be opaque to users of
* the interface. The value may be null if the wait flag is not set.
*/
uma_zone_t uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
uma_init zinit, uma_fini zfini, uma_zone_t master);
/*
* Add a second master to a secondary zone. This provides multiple data
* backends for objects with the same size. Both masters must have
* compatible allocation flags. Presently, UMA_ZONE_MALLOC type zones are
* the only supported.
*
* Returns:
* Error on failure, 0 on success.
*/
int uma_zsecond_add(uma_zone_t zone, uma_zone_t master);
/*
* Create cache-only zones.
*
* This allows uma's per-cpu cache facilities to handle arbitrary
* pointers. Consumers must specify the import and release functions to
* fill and destroy caches. UMA does not allocate any memory for these
* zones. The 'arg' parameter is passed to import/release and is caller
* specific.
*/
uma_zone_t uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
uma_init zinit, uma_fini zfini, uma_import zimport,
uma_release zrelease, void *arg, int flags);
/*
* Definitions for uma_zcreate flags
*
* These flags share space with UMA_ZFLAGs in uma_int.h. Be careful not to
* overlap when adding new features. 0xf0000000 is in use by uma_int.h.
*/
#define UMA_ZONE_PAGEABLE 0x0001 /* Return items not fully backed by
physical memory XXX Not yet */
#define UMA_ZONE_ZINIT 0x0002 /* Initialize with zeros */
#define UMA_ZONE_STATIC 0x0004 /* Statically sized zone */
#define UMA_ZONE_OFFPAGE 0x0008 /* Force the slab structure allocation
off of the real memory */
#define UMA_ZONE_MALLOC 0x0010 /* For use by malloc(9) only! */
#define UMA_ZONE_NOFREE 0x0020 /* Do not free slabs of this type! */
#define UMA_ZONE_MTXCLASS 0x0040 /* Create a new lock class */
#define UMA_ZONE_VM 0x0080 /*
* Used for internal vm datastructures
* only.
*/
#define UMA_ZONE_HASH 0x0100 /*
* Use a hash table instead of caching
* information in the vm_page.
*/
#define UMA_ZONE_SECONDARY 0x0200 /* Zone is a Secondary Zone */
/* 0x0400 Unused */
#define UMA_ZONE_MAXBUCKET 0x0800 /* Use largest buckets */
#define UMA_ZONE_CACHESPREAD 0x1000 /*
* Spread memory start locations across
* all possible cache lines. May
* require many virtually contiguous
* backend pages and can fail early.
*/
#define UMA_ZONE_VTOSLAB 0x2000 /* Zone uses vtoslab for lookup. */
#define UMA_ZONE_NODUMP 0x4000 /*
* Zone's pages will not be included in
* mini-dumps.
*/
#define UMA_ZONE_PCPU 0x8000 /*
* Allocates mp_maxid + 1 slabs sized to
* sizeof(struct pcpu).
*/
/*
* These flags are shared between the keg and zone. In zones wishing to add
* new kegs these flags must be compatible. Some are determined based on
* physical parameters of the request and may not be provided by the consumer.
*/
#define UMA_ZONE_INHERIT \
(UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_NOFREE | \
UMA_ZONE_HASH | UMA_ZONE_VTOSLAB | UMA_ZONE_PCPU)
/* Definitions for align */
#define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */
#define UMA_ALIGN_LONG (sizeof(long) - 1) /* "" long */
#define UMA_ALIGN_INT (sizeof(int) - 1) /* "" int */
#define UMA_ALIGN_SHORT (sizeof(short) - 1) /* "" short */
#define UMA_ALIGN_CHAR (sizeof(char) - 1) /* "" char */
#define UMA_ALIGN_CACHE (0 - 1) /* Cache line size align */
/*
* Destroys an empty uma zone. If the zone is not empty uma complains loudly.
*
* Arguments:
* zone The zone we want to destroy.
*
*/
void uma_zdestroy(uma_zone_t zone);
/*
* Allocates an item out of a zone
*
* Arguments:
* zone The zone we are allocating from
* arg This data is passed to the ctor function
* flags See sys/malloc.h for available flags.
*
* Returns:
* A non-null pointer to an initialized element from the zone is
* guaranteed if the wait flag is M_WAITOK. Otherwise a null pointer
* may be returned if the zone is empty or the ctor failed.
*/
void *uma_zalloc_arg(uma_zone_t zone, void *arg, int flags);
/*
* Allocates an item out of a zone without supplying an argument
*
* This is just a wrapper for uma_zalloc_arg for convenience.
*
*/
static __inline void *uma_zalloc(uma_zone_t zone, int flags);
static __inline void *
uma_zalloc(uma_zone_t zone, int flags)
{
return uma_zalloc_arg(zone, NULL, flags);
}
/*
* Frees an item back into the specified zone.
*
* Arguments:
* zone The zone the item was originally allocated out of.
* item The memory to be freed.
* arg Argument passed to the destructor
*
* Returns:
* Nothing.
*/
void uma_zfree_arg(uma_zone_t zone, void *item, void *arg);
/*
* Frees an item back to a zone without supplying an argument
*
* This is just a wrapper for uma_zfree_arg for convenience.
*
*/
static __inline void uma_zfree(uma_zone_t zone, void *item);
static __inline void
uma_zfree(uma_zone_t zone, void *item)
{
uma_zfree_arg(zone, item, NULL);
}
/*
* XXX The rest of the prototypes in this header are h0h0 magic for the VM.
* If you think you need to use it for a normal zone you're probably incorrect.
*/
/*
* Backend page supplier routines
*
* Arguments:
* zone The zone that is requesting pages.
* size The number of bytes being requested.
* pflag Flags for these memory pages, see below.
* wait Indicates our willingness to block.
*
* Returns:
* A pointer to the allocated memory or NULL on failure.
*/
typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, uint8_t *pflag,
int wait);
/*
* Backend page free routines
*
* Arguments:
* item A pointer to the previously allocated pages.
* size The original size of the allocation.
* pflag The flags for the slab. See UMA_SLAB_* below.
*
* Returns:
* None
*/
typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag);
/*
* Sets up the uma allocator. (Called by vm_mem_init)
*
* Arguments:
* bootmem A pointer to memory used to bootstrap the system.
*
* Returns:
* Nothing
*
* Discussion:
* This memory is used for zones which allocate things before the
* backend page supplier can give us pages. It should be
* UMA_SLAB_SIZE * boot_pages bytes. (see uma_int.h)
*
*/
void uma_startup(void *bootmem, int boot_pages);
/*
* Finishes starting up the allocator. This should
* be called when kva is ready for normal allocs.
*
* Arguments:
* None
*
* Returns:
* Nothing
*
* Discussion:
* uma_startup2 is called by kmeminit() to enable us of uma for malloc.
*/
void uma_startup2(void);
/*
* Reclaims unused memory for all zones
*
* Arguments:
* None
* Returns:
* None
*
* This should only be called by the page out daemon.
*/
void uma_reclaim(void);
/*
* Sets the alignment mask to be used for all zones requesting cache
* alignment. Should be called by MD boot code prior to starting VM/UMA.
*
* Arguments:
* align The alignment mask
*
* Returns:
* Nothing
*/
void uma_set_align(int align);
/*
* Set a reserved number of items to hold for M_USE_RESERVE allocations. All
* other requests must allocate new backing pages.
*/
void uma_zone_reserve(uma_zone_t zone, int nitems);
/*
* Reserves the maximum KVA space required by the zone and configures the zone
* to use a VM_ALLOC_NOOBJ-based backend allocator.
*
* Arguments:
* zone The zone to update.
* nitems The upper limit on the number of items that can be allocated.
*
* Returns:
* 0 if KVA space can not be allocated
* 1 if successful
*
* Discussion:
* When the machine supports a direct map and the zone's items are smaller
* than a page, the zone will use the direct map instead of allocating KVA
* space.
*/
int uma_zone_reserve_kva(uma_zone_t zone, int nitems);
/*
* Sets a high limit on the number of items allowed in a zone
*
* Arguments:
* zone The zone to limit
* nitems The requested upper limit on the number of items allowed
*
* Returns:
* int The effective value of nitems after rounding up based on page size
*/
int uma_zone_set_max(uma_zone_t zone, int nitems);
/*
* Obtains the effective limit on the number of items in a zone
*
* Arguments:
* zone The zone to obtain the effective limit from
*
* Return:
* 0 No limit
* int The effective limit of the zone
*/
int uma_zone_get_max(uma_zone_t zone);
/*
* Sets a warning to be printed when limit is reached
*
* Arguments:
* zone The zone we will warn about
* warning Warning content
*
* Returns:
* Nothing
*/
void uma_zone_set_warning(uma_zone_t zone, const char *warning);
/*
* Sets a function to run when limit is reached
*
* Arguments:
* zone The zone to which this applies
* fx The function ro run
*
* Returns:
* Nothing
*/
typedef void (*uma_maxaction_t)(uma_zone_t, int);
void uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t);
/*
* Obtains the approximate current number of items allocated from a zone
*
* Arguments:
* zone The zone to obtain the current allocation count from
*
* Return:
* int The approximate current number of items allocated from the zone
*/
int uma_zone_get_cur(uma_zone_t zone);
/*
* The following two routines (uma_zone_set_init/fini)
* are used to set the backend init/fini pair which acts on an
* object as it becomes allocated and is placed in a slab within
* the specified zone's backing keg. These should probably not
* be changed once allocations have already begun, but only be set
* immediately upon zone creation.
*/
void uma_zone_set_init(uma_zone_t zone, uma_init uminit);
void uma_zone_set_fini(uma_zone_t zone, uma_fini fini);
/*
* The following two routines (uma_zone_set_zinit/zfini) are
* used to set the zinit/zfini pair which acts on an object as
* it passes from the backing Keg's slab cache to the
* specified Zone's bucket cache. These should probably not
* be changed once allocations have already begun, but only be set
* immediately upon zone creation.
*/
void uma_zone_set_zinit(uma_zone_t zone, uma_init zinit);
void uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini);
/*
* Replaces the standard backend allocator for this zone.
*
* Arguments:
* zone The zone whose backend allocator is being changed.
* allocf A pointer to the allocation function
*
* Returns:
* Nothing
*
* Discussion:
* This could be used to implement pageable allocation, or perhaps
* even DMA allocators if used in conjunction with the OFFPAGE
* zone flag.
*/
void uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf);
/*
* Used for freeing memory provided by the allocf above
*
* Arguments:
* zone The zone that intends to use this free routine.
* freef The page freeing routine.
*
* Returns:
* Nothing
*/
void uma_zone_set_freef(uma_zone_t zone, uma_free freef);
/*
* These flags are setable in the allocf and visible in the freef.
*/
#define UMA_SLAB_BOOT 0x01 /* Slab alloced from boot pages */
#define UMA_SLAB_KMEM 0x02 /* Slab alloced from kmem_map */
#define UMA_SLAB_KERNEL 0x04 /* Slab alloced from kernel_map */
#define UMA_SLAB_PRIV 0x08 /* Slab alloced from priv allocator */
#define UMA_SLAB_OFFP 0x10 /* Slab is managed separately */
#define UMA_SLAB_MALLOC 0x20 /* Slab is a large malloc slab */
/* 0x40 and 0x80 are available */
/*
* Used to pre-fill a zone with some number of items
*
* Arguments:
* zone The zone to fill
* itemcnt The number of items to reserve
*
* Returns:
* Nothing
*
* NOTE: This is blocking and should only be done at startup
*/
void uma_prealloc(uma_zone_t zone, int itemcnt);
/*
* Used to determine if a fixed-size zone is exhausted.
*
* Arguments:
* zone The zone to check
*
* Returns:
* Non-zero if zone is exhausted.
*/
int uma_zone_exhausted(uma_zone_t zone);
int uma_zone_exhausted_nolock(uma_zone_t zone);
/*
* Common UMA_ZONE_PCPU zones.
*/
extern uma_zone_t pcpu_zone_64;
extern uma_zone_t pcpu_zone_ptr;
/*
* Exported statistics structures to be used by user space monitoring tools.
* Statistics stream consists of a uma_stream_header, followed by a series of
* alternative uma_type_header and uma_type_stat structures.
*/
#define UMA_STREAM_VERSION 0x00000001
struct uma_stream_header {
uint32_t ush_version; /* Stream format version. */
uint32_t ush_maxcpus; /* Value of MAXCPU for stream. */
uint32_t ush_count; /* Number of records. */
uint32_t _ush_pad; /* Pad/reserved field. */
};
#define UTH_MAX_NAME 32
#define UTH_ZONE_SECONDARY 0x00000001
struct uma_type_header {
/*
* Static per-zone data, some extracted from the supporting keg.
*/
char uth_name[UTH_MAX_NAME];
uint32_t uth_align; /* Keg: alignment. */
uint32_t uth_size; /* Keg: requested size of item. */
uint32_t uth_rsize; /* Keg: real size of item. */
uint32_t uth_maxpages; /* Keg: maximum number of pages. */
uint32_t uth_limit; /* Keg: max items to allocate. */
/*
* Current dynamic zone/keg-derived statistics.
*/
uint32_t uth_pages; /* Keg: pages allocated. */
uint32_t uth_keg_free; /* Keg: items free. */
uint32_t uth_zone_free; /* Zone: items free. */
uint32_t uth_bucketsize; /* Zone: desired bucket size. */
uint32_t uth_zone_flags; /* Zone: flags. */
uint64_t uth_allocs; /* Zone: number of allocations. */
uint64_t uth_frees; /* Zone: number of frees. */
uint64_t uth_fails; /* Zone: number of alloc failures. */
uint64_t uth_sleeps; /* Zone: number of alloc sleeps. */
uint64_t _uth_reserved1[2]; /* Reserved. */
};
struct uma_percpu_stat {
uint64_t ups_allocs; /* Cache: number of allocations. */
uint64_t ups_frees; /* Cache: number of frees. */
uint64_t ups_cache_free; /* Cache: free items in cache. */
uint64_t _ups_reserved[5]; /* Reserved. */
};
void uma_reclaim_wakeup(void);
void uma_reclaim_worker(void *);
#endif /* _VM_UMA_H_ */

View File

@ -0,0 +1,338 @@
/*-
* Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
* Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*
*/
#include <sys/_task.h>
/*
* This file includes definitions, structures, prototypes, and inlines that
* should not be used outside of the actual implementation of UMA.
*/
/*
* Here's a quick description of the relationship between the objects:
*
* Kegs contain lists of slabs which are stored in either the full bin, empty
* bin, or partially allocated bin, to reduce fragmentation. They also contain
* the user supplied value for size, which is adjusted for alignment purposes
* and rsize is the result of that. The Keg also stores information for
* managing a hash of page addresses that maps pages to uma_slab_t structures
* for pages that don't have embedded uma_slab_t's.
*
* The uma_slab_t may be embedded in a UMA_SLAB_SIZE chunk of memory or it may
* be allocated off the page from a special slab zone. The free list within a
* slab is managed with a bitmask. For item sizes that would yield more than
* 10% memory waste we potentially allocate a separate uma_slab_t if this will
* improve the number of items per slab that will fit.
*
* The only really gross cases, with regards to memory waste, are for those
* items that are just over half the page size. You can get nearly 50% waste,
* so you fall back to the memory footprint of the power of two allocator. I
* have looked at memory allocation sizes on many of the machines available to
* me, and there does not seem to be an abundance of allocations at this range
* so at this time it may not make sense to optimize for it. This can, of
* course, be solved with dynamic slab sizes.
*
* Kegs may serve multiple Zones but by far most of the time they only serve
* one. When a Zone is created, a Keg is allocated and setup for it. While
* the backing Keg stores slabs, the Zone caches Buckets of items allocated
* from the slabs. Each Zone is equipped with an init/fini and ctor/dtor
* pair, as well as with its own set of small per-CPU caches, layered above
* the Zone's general Bucket cache.
*
* The PCPU caches are protected by critical sections, and may be accessed
* safely only from their associated CPU, while the Zones backed by the same
* Keg all share a common Keg lock (to coalesce contention on the backing
* slabs). The backing Keg typically only serves one Zone but in the case of
* multiple Zones, one of the Zones is considered the Master Zone and all
* Zone-related stats from the Keg are done in the Master Zone. For an
* example of a Multi-Zone setup, refer to the Mbuf allocation code.
*/
/*
* This is the representation for normal (Non OFFPAGE slab)
*
* i == item
* s == slab pointer
*
* <---------------- Page (UMA_SLAB_SIZE) ------------------>
* ___________________________________________________________
* | _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ___________ |
* ||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i| |slab header||
* ||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_| |___________||
* |___________________________________________________________|
*
*
* This is an OFFPAGE slab. These can be larger than UMA_SLAB_SIZE.
*
* ___________________________________________________________
* | _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ |
* ||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i| |
* ||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_| |
* |___________________________________________________________|
* ___________ ^
* |slab header| |
* |___________|---*
*
*/
#ifndef VM_UMA_INT_H
#define VM_UMA_INT_H
#ifndef PAGE_SIZE
#define PAGE_SIZE 4096
#endif
#define UMA_SLAB_SIZE PAGE_SIZE /* How big are our slabs? */
#define UMA_SLAB_MASK (PAGE_SIZE - 1) /* Mask to get back to the page */
#define UMA_SLAB_SHIFT PAGE_SHIFT /* Number of bits PAGE_MASK */
#define UMA_BOOT_PAGES 64 /* Pages allocated for startup */
#define UMA_BOOT_PAGES_ZONES 32 /* Multiplier for pages to reserve */
/* if uma_zone > PAGE_SIZE */
/* Max waste percentage before going to off page slab management */
#define UMA_MAX_WASTE 10
/*
* I doubt there will be many cases where this is exceeded. This is the initial
* size of the hash table for uma_slabs that are managed off page. This hash
* does expand by powers of two. Currently it doesn't get smaller.
*/
#define UMA_HASH_SIZE_INIT 32
/*
* I should investigate other hashing algorithms. This should yield a low
* number of collisions if the pages are relatively contiguous.
*/
#define UMA_HASH(h, s) ((((uintptr_t)s) >> UMA_SLAB_SHIFT) & (h)->uh_hashmask)
#define UMA_HASH_INSERT(h, s, mem) \
SLIST_INSERT_HEAD(&(h)->uh_slab_hash[UMA_HASH((h), \
(mem))], (s), us_hlink)
#define UMA_HASH_REMOVE(h, s, mem) \
SLIST_REMOVE(&(h)->uh_slab_hash[UMA_HASH((h), \
(mem))], (s), uma_slab, us_hlink)
/* Hash table for freed address -> slab translation */
SLIST_HEAD(slabhead, uma_slab);
struct uma_hash {
struct slabhead *uh_slab_hash; /* Hash table for slabs */
int uh_hashsize; /* Current size of the hash table */
int uh_hashmask; /* Mask used during hashing */
};
/*
* align field or structure to cache line
*/
#define UMA_ALIGN __attribute__((__aligned__(64)))
/*
* Structures for per cpu queues.
*/
struct uma_bucket {
LIST_ENTRY(uma_bucket) ub_link; /* Link into the zone */
int16_t ub_cnt; /* Count of free items. */
int16_t ub_entries; /* Max items. */
void *ub_bucket[]; /* actual allocation storage */
};
typedef struct uma_bucket * uma_bucket_t;
struct uma_cache {
uma_bucket_t uc_freebucket; /* Bucket we're freeing to */
uma_bucket_t uc_allocbucket; /* Bucket to allocate from */
uint64_t uc_allocs; /* Count of allocations */
uint64_t uc_frees; /* Count of frees */
} UMA_ALIGN;
typedef struct uma_cache * uma_cache_t;
/*
* Keg management structure
*
* TODO: Optimize for cache line size
*
*/
struct uma_keg {
struct mtx_padalign uk_lock; /* Lock for the keg */
struct uma_hash uk_hash;
LIST_HEAD(,uma_zone) uk_zones; /* Keg's zones */
LIST_HEAD(,uma_slab) uk_part_slab; /* partially allocated slabs */
LIST_HEAD(,uma_slab) uk_free_slab; /* empty slab list */
LIST_HEAD(,uma_slab) uk_full_slab; /* full slabs */
uint32_t uk_align; /* Alignment mask */
uint32_t uk_pages; /* Total page count */
uint32_t uk_free; /* Count of items free in slabs */
uint32_t uk_reserve; /* Number of reserved items. */
uint32_t uk_size; /* Requested size of each item */
uint32_t uk_rsize; /* Real size of each item */
uint32_t uk_maxpages; /* Maximum number of pages to alloc */
uma_init uk_init; /* Keg's init routine */
uma_fini uk_fini; /* Keg's fini routine */
uma_alloc uk_allocf; /* Allocation function */
uma_free uk_freef; /* Free routine */
u_long uk_offset; /* Next free offset from base KVA */
vm_offset_t uk_kva; /* Zone base KVA */
uma_zone_t uk_slabzone; /* Slab zone backing us, if OFFPAGE */
uint16_t uk_slabsize; /* Slab size for this keg */
uint16_t uk_pgoff; /* Offset to uma_slab struct */
uint16_t uk_ppera; /* pages per allocation from backend */
uint16_t uk_ipers; /* Items per slab */
uint32_t uk_flags; /* Internal flags */
/* Least used fields go to the last cache line. */
const char *uk_name; /* Name of creating zone. */
LIST_ENTRY(uma_keg) uk_link; /* List of all kegs */
};
typedef struct uma_keg * uma_keg_t;
/*
* Free bits per-slab.
*/
#define SLAB_SETSIZE (PAGE_SIZE / UMA_SMALLEST_UNIT)
BITSET_DEFINE(slabbits, SLAB_SETSIZE);
/*
* The slab structure manages a single contiguous allocation from backing
* store and subdivides it into individually allocatable items.
*/
struct uma_slab {
uma_keg_t us_keg; /* Keg we live in */
union {
LIST_ENTRY(uma_slab) _us_link; /* slabs in zone */
unsigned long _us_size; /* Size of allocation */
} us_type;
SLIST_ENTRY(uma_slab) us_hlink; /* Link for hash table */
uint8_t *us_data; /* First item */
struct slabbits us_free; /* Free bitmask. */
#ifdef INVARIANTS
struct slabbits us_debugfree; /* Debug bitmask. */
#endif
uint16_t us_freecount; /* How many are free? */
uint8_t us_flags; /* Page flags see uma.h */
uint8_t us_pad; /* Pad to 32bits, unused. */
};
#define us_link us_type._us_link
#define us_size us_type._us_size
typedef struct uma_slab * uma_slab_t;
typedef uma_slab_t (*uma_slaballoc)(uma_zone_t, uma_keg_t, int);
struct uma_klink {
LIST_ENTRY(uma_klink) kl_link;
uma_keg_t kl_keg;
};
typedef struct uma_klink *uma_klink_t;
/*
* Zone management structure
*
* TODO: Optimize for cache line size
*
*/
struct uma_zone {
struct mtx_padalign uz_lock; /* Lock for the zone */
struct mtx_padalign *uz_lockptr;
const char *uz_name; /* Text name of the zone */
LIST_ENTRY(uma_zone) uz_link; /* List of all zones in keg */
LIST_HEAD(,uma_bucket) uz_buckets; /* full buckets */
LIST_HEAD(,uma_klink) uz_kegs; /* List of kegs. */
struct uma_klink uz_klink; /* klink for first keg. */
uma_slaballoc uz_slab; /* Allocate a slab from the backend. */
uma_ctor uz_ctor; /* Constructor for each allocation */
uma_dtor uz_dtor; /* Destructor */
uma_init uz_init; /* Initializer for each item */
uma_fini uz_fini; /* Finalizer for each item. */
uma_import uz_import; /* Import new memory to cache. */
uma_release uz_release; /* Release memory from cache. */
void *uz_arg; /* Import/release argument. */
uint32_t uz_flags; /* Flags inherited from kegs */
uint32_t uz_size; /* Size inherited from kegs */
volatile u_long uz_allocs UMA_ALIGN; /* Total number of allocations */
volatile u_long uz_fails; /* Total number of alloc failures */
volatile u_long uz_frees; /* Total number of frees */
uint64_t uz_sleeps; /* Total number of alloc sleeps */
uint16_t uz_count; /* Amount of items in full bucket */
uint16_t uz_count_min; /* Minimal amount of items there */
/* The next two fields are used to print a rate-limited warnings. */
const char *uz_warning; /* Warning to print on failure */
struct timeval uz_ratecheck; /* Warnings rate-limiting */
struct task uz_maxaction; /* Task to run when at limit */
/*
* This HAS to be the last item because we adjust the zone size
* based on NCPU and then allocate the space for the zones.
*/
struct uma_cache uz_cpu[1]; /* Per cpu caches */
};
/*
* These flags must not overlap with the UMA_ZONE flags specified in uma.h.
*/
#define UMA_ZFLAG_MULTI 0x04000000 /* Multiple kegs in the zone. */
#define UMA_ZFLAG_DRAINING 0x08000000 /* Running zone_drain. */
#define UMA_ZFLAG_BUCKET 0x10000000 /* Bucket zone. */
#define UMA_ZFLAG_INTERNAL 0x20000000 /* No offpage no PCPU. */
#define UMA_ZFLAG_FULL 0x40000000 /* Reached uz_maxpages */
#define UMA_ZFLAG_CACHEONLY 0x80000000 /* Don't ask VM for buckets. */
#define UMA_ZFLAG_INHERIT \
(UMA_ZFLAG_INTERNAL | UMA_ZFLAG_CACHEONLY | UMA_ZFLAG_BUCKET)
static inline uma_keg_t
zone_first_keg(uma_zone_t zone)
{
uma_klink_t klink;
klink = LIST_FIRST(&zone->uz_kegs);
return (klink != NULL) ? klink->kl_keg : NULL;
}
#undef UMA_ALIGN
#endif /* VM_UMA_INT_H */

View File

@ -86,7 +86,7 @@ ioctl_va(int fd, unsigned long com, void *data, int argc, ...)
}
if (size > msg->buf_len) {
errno = EINVAL;
errno = ENOMEM;
ff_ipc_msg_free(msg);
return -1;
}
@ -99,7 +99,7 @@ ioctl_va(int fd, unsigned long com, void *data, int argc, ...)
if (argc == 3) {
if (size + clen > msg->buf_len) {
errno = EINVAL;
errno = ENOMEM;
ff_ipc_msg_free(msg);
return -1;
}

View File

@ -25,6 +25,7 @@
*/
#include <string.h>
#include <rte_malloc.h>
#include "ff_ipc.h"
@ -33,6 +34,8 @@ sysctl(int *name, unsigned namelen, void *old,
size_t *oldlenp, const void *new, size_t newlen)
{
struct ff_msg *msg, *retmsg = NULL;
char *extra_buf = NULL;
size_t total_len;
if (old != NULL && oldlenp == NULL) {
errno = EINVAL;
@ -50,10 +53,16 @@ sysctl(int *name, unsigned namelen, void *old,
oldlen = *oldlenp;
}
if (namelen + oldlen + newlen > msg->buf_len) {
errno = EINVAL;
ff_ipc_msg_free(msg);
return -1;
total_len = namelen + oldlen + newlen;
if (total_len > msg->buf_len) {
extra_buf = rte_malloc(NULL, total_len, 0);
if (extra_buf == NULL) {
errno = ENOMEM;
ff_ipc_msg_free(msg);
return -1;
}
msg->buf_addr = extra_buf;
msg->buf_len = total_len;
}
char *buf_addr = msg->buf_addr;
@ -97,6 +106,9 @@ sysctl(int *name, unsigned namelen, void *old,
if (ret < 0) {
errno = EPIPE;
ff_ipc_msg_free(msg);
if (extra_buf) {
rte_free(extra_buf);
}
return -1;
}
@ -108,6 +120,9 @@ sysctl(int *name, unsigned namelen, void *old,
if (ret < 0) {
errno = EPIPE;
ff_ipc_msg_free(msg);
if (extra_buf) {
rte_free(extra_buf);
}
return -1;
}
} while (msg != retmsg);
@ -127,6 +142,9 @@ sysctl(int *name, unsigned namelen, void *old,
}
ff_ipc_msg_free(msg);
if (extra_buf) {
rte_free(extra_buf);
}
return ret;
}

46
tools/lib.mk Normal file
View File

@ -0,0 +1,46 @@
#
# Derived from FreeBSD src/share/mk/bsd.lib.mk
#
ifdef DEBUG_FLAGS
CFLAGS+=${DEBUG_FLAGS}
CXXFLAGS+=${DEBUG_FLAGS}
endif
ifndef LIB
$(error LIB must be defined.)
endif
ifndef SRCS
SRCS= ${LIB}.c
endif
ifndef TOPDIR
$(error TOPDIR must be defined.)
endif
FF_LIB_CFLAGS:= -g -Wall -Werror -DFSTACK -std=gnu99
FF_LIB_CFLAGS+= -I${TOPDIR}/lib -I${TOPDIR}/tools/compat
FF_LIB_CFLAGS+= -include${TOPDIR}/tools/compat/compat.h
FF_LIB_CFLAGS+= -I${TOPDIR}/tools/compat/include -D__BSD_VISIBLE
CFLAGS+= ${FF_LIB_CFLAGS}
OBJS+= $(patsubst %.cc,%.o,$(patsubst %.c,%.o,${SRCS}))
LIBBASENAME=lib${LIB}
CLEANFILES+= ${LIBBASENAME}.a ${OBJS}
${LIBBASENAME}.a: ${OBJS}
rm -f $@
ar -cqs $@ ${OBJS}
${OBJS}: %.o: %.c
${CC} -c ${CFLAGS} $<
clean:
@rm -f ${CLEANFILES}
all: ${LIBBASENAME}.a

35
tools/libmemstat/Makefile Normal file
View File

@ -0,0 +1,35 @@
# $FreeBSD$
TOPDIR?=${CURDIR}/../..
include ${TOPDIR}/tools/opts.mk
PACKAGE=lib${LIB}
WARNS?= 3
LIB= memstat
SHLIB_MAJOR= 3
LIBADD+= kvm
SRCS+= memstat.c
SRCS+= memstat_all.c
SRCS+= memstat_malloc.c
SRCS+= memstat_uma.c
INCS= memstat.h
MAN= libmemstat.3
MLINKS+= libmemstat.3 memstat_mtl_alloc.3
MLINKS+= libmemstat.3 memstat_mtl_first.3
MLINKS+= libmemstat.3 memstat_mtl_next.3
MLINKS+= libmemstat.3 memstat_mtl_find.3
MLINKS+= libmemstat.3 memstat_mtl_free.3
MLINKS+= libmemstat.3 memstat_mtl_geterror.3
MLINKS+= libmemstat.3 memstat_strerror.3
MLINKS+= libmemstat.3 memstat_sysctl_all.3
MLINKS+= libmemstat.3 memstat_sysctl_malloc.3
MLINKS+= libmemstat.3 memstat_sysctl_uma.3
MLINKS+= libmemstat.3 memstat_kvm_all.3
MLINKS+= libmemstat.3 memstat_kvm_malloc.3
MLINKS+= libmemstat.3 memstat_kvm_uma.3
include ${TOPDIR}/tools/lib.mk

View File

@ -0,0 +1,19 @@
# $FreeBSD$
# Autogenerated - do NOT edit!
DIRDEPS = \
gnu/lib/csu \
gnu/lib/libgcc \
include \
include/xlocale \
lib/${CSU_DIR} \
lib/libc \
lib/libcompiler_rt \
lib/libkvm \
.include <dirdeps.mk>
.if ${DEP_RELDIR} == ${_DEP_RELDIR}
# local dependencies - needed for -jN in clean tree
.endif

View File

@ -0,0 +1,499 @@
.\" Copyright (c) 2005 Robert N. M. Watson
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
.\"
.Dd February 11, 2014
.Dt LIBMEMSTAT 3
.Os
.Sh NAME
.Nm libmemstat
.Nd "library interface to retrieve kernel memory allocator statistics"
.Sh LIBRARY
.Lb libmemstat
.Sh SYNOPSIS
.In sys/types.h
.In memstat.h
.Ss General Functions
.Ft "const char *"
.Fn memstat_strerror "int error"
.Ss Memory Type List Management Functions
.Ft "struct memory_type_list *"
.Fn memstat_mtl_alloc "void"
.Ft "struct memory_type *"
.Fn memstat_mtl_first "struct memory_type_list *list"
.Ft "struct memory_type *"
.Fn memstat_mtl_next "struct memory_type *mtp"
.Ft "struct memory_type *"
.Fo memstat_mtl_find
.Fa "struct memory_type_list *list" "int allocator" "const char *name"
.Fc
.Ft void
.Fn memstat_mtl_free "struct memory_type_list *list"
.Ft int
.Fn memstat_mtl_geterror "struct memory_type_list *list"
.Ss Allocator Query Functions
.Ft int
.Fn memstat_kvm_all "struct memory_type_list *list" "void *kvm_handle"
.Ft int
.Fn memstat_kvm_malloc "struct memory_type_list *list" "void *kvm_handle"
.Ft int
.Fn memstat_kvm_uma "struct memory_type_list *list" "void *kvm_handle"
.Ft int
.Fn memstat_sysctl_all "struct memory_type_list *list" "int flags"
.Ft int
.Fn memstat_sysctl_malloc "struct memory_type_list *list" "int flags"
.Ft int
.Fn memstat_sysctl_uma "struct memory_type_list *list" "int flags"
.Ss Memory Type Accessor Methods
.Ft "const char *"
.Fn memstat_get_name "const struct memory_type *mtp"
.Ft int
.Fn memstat_get_allocator "const struct memory_type *mtp"
.Ft uint64_t
.Fn memstat_get_countlimit "const struct memory_type *mtp"
.Ft uint64_t
.Fn memstat_get_byteslimit "const struct memory_type *mtp"
.Ft uint64_t
.Fn memstat_get_sizemask "const struct memory_type *mtp"
.Ft uint64_t
.Fn memstat_get_size "const struct memory_type *mtp"
.Ft uint64_t
.Fn memstat_get_rsize "const struct memory_type *mtp"
.Ft uint64_t
.Fn memstat_get_memalloced "const struct memory_type *mtp"
.Ft uint64_t
.Fn memstat_get_memfreed "const struct memory_type *mtp"
.Ft uint64_t
.Fn memstat_get_numallocs "const struct memory_type *mtp"
.Ft uint64_t
.Fn memstat_get_numfrees "const struct memory_type *mtp"
.Ft uint64_t
.Fn memstat_get_bytes "const struct memory_type *mtp"
.Ft uint64_t
.Fn memstat_get_count "const struct memory_type *mtp"
.Ft uint64_t
.Fn memstat_get_free "const struct memory_type *mtp"
.Ft uint64_t
.Fn memstat_get_failures "const struct memory_type *mtp"
.Ft "void *"
.Fn memstat_get_caller_pointer "const struct memory_type *mtp" "int index"
.Ft void
.Fo memstat_set_caller_pointer
.Fa "struct memory_type *mtp" "int index" "void *value"
.Fc
.Ft uint64_t
.Fn memstat_get_caller_uint64 "const struct memory_type *mtp" "int index"
.Ft void
.Fo memstat_set_caller_uint64
.Fa "struct memory_type *mtp" "int index" "uint64_t value"
.Fc
.Ft uint64_t
.Fn memstat_get_zonefree "const struct memory_type *mtp"
.Ft uint64_t
.Fn memstat_get_kegfree "const struct memory_type *mtp"
.Ft uint64_t
.Fn memstat_get_percpu_memalloced "const struct memory_type *mtp" "int cpu"
.Ft uint64_t
.Fn memstat_get_percpu_memfreed "const struct memory_type *mtp" "int cpu"
.Ft uint64_t
.Fn memstat_get_percpu_numallocs "const struct memory_type *mtp" "int cpu"
.Ft uint64_t
.Fn memstat_get_percpu_numfrees "const struct memory_type *mtp" "int cpu"
.Ft uint64_t
.Fn memstat_get_percpu_sizemask "const struct memory_type *mtp" "int cpu"
.Ft "void *"
.Fo memstat_get_percpu_caller_pointer
.Fa "const struct memory_type *mtp" "int cpu" "int index"
.Fc
.Ft void
.Fo memstat_set_percpu_caller_pointer
.Fa "struct memory_type *mtp" "int cpu" "int index" "void *value"
.Fc
.Ft uint64_t
.Fo memstat_get_percpu_caller_uint64
.Fa "const struct memory_type *mtp" "int cpu" "int index"
.Fc
.Ft void
.Fo memstat_set_percpu_caller_uint64
.Fa "struct memory_type *mtp" "int cpu" "int index" "uint64_t value"
.Fc
.Ft uint64_t
.Fn memstat_get_percpu_free "const struct memory_type *mtp" "int cpu"
.Sh DESCRIPTION
.Nm
provides an interface to retrieve kernel memory allocator statistics, for
the purposes of debugging and system monitoring, insulating applications
from implementation details of the allocators, and allowing a tool to
transparently support multiple allocators.
.Nm
supports both retrieving a single statistics snapshot, as well as
incrementally updating statistics for long-term monitoring.
.Pp
.Nm
describes each memory type using a
.Vt "struct memory_type" ,
an opaque memory type accessed by the application using accessor functions
in the library.
.Nm
returns and updates chains of
.Vt "struct memory_type"
via a
.Vt "struct memory_type_list" ,
which will be allocated by calling
.Fn memstat_mtl_alloc ,
and freed on completion using
.Fn memstat_mtl_free .
Lists of memory types are populated via calls that query the kernel for
statistics information; currently:
.Fn memstat_kvm_all ,
.Fn memstat_kvm_malloc ,
.Fn memstat_kvm_uma ,
.Fn memstat_sysctl_all ,
.Fn memstat_sysctl_uma ,
and
.Fn memstat_sysctl_malloc .
Repeated calls will incrementally update the list of memory types, permitting
tracking over time without recreating all list state.
If an error is detected during a query call, error condition information may
be retrieved using
.Fn memstat_mtl_geterror ,
and converted to a user-readable string using
.Fn memstat_strerror .
.Pp
Freeing the list will free all memory type data in the list, and so
invalidates any outstanding pointers to entries in the list.
.Vt "struct memory_type"
entries in the list may be iterated over using
.Fn memstat_mtl_first
and
.Fn memstat_mtl_next ,
which respectively return the first entry in a list, and the next entry in a
list.
.Fn memstat_mtl_find ,
which will return a pointer to the first entry matching the passed
parameters.
.Pp
A series of accessor methods is provided to access fields of the structure,
including retrieving statistics and properties, as well as setting of caller
owned fields.
Direct application access to the data structure fields is not supported.
.Ss Library Vt memory_type Ss Fields
Each
.Vt "struct memory_type"
holds a description of the memory type, including its name and the allocator
it is managed by, as well as current statistics on use.
Some statistics are directly measured, others are derived from directly
measured statistics.
Certain high level statistics are present across all available allocators,
such as the number of allocation and free operations; other measurements,
such as the quantity of free items in per-CPU caches, or administrative
limit on the number of allocations, is available only for specific
allocators.
.Ss Caller Vt memory_type Ss Fields
.Vt "struct memory_type"
includes fields to allow the application to store data, in the form of
pointers and 64-bit integers, with memory types.
For example, the application author might make use of one of the caller
pointers to reference a more complex data structure tracking long-term
behavior of the memory type, or a window system object that is used to
render the state of the memory type.
General and per-CPU storage is provided with each
.Vt "struct memory_type"
in the form of an array of pointers and integers.
The array entries are accessed via the
.Fa index
argument to the get and set accessor methods.
Possible values of
.Fa index
range between
0
and
.Dv MEMSTAT_MAXCALLER .
.Pp
Caller-owned fields are initialized to
0
or
.Dv NULL
when a new
.Vt "struct memory_type"
is allocated and attached to a memory type list; these fields retain their
values across queries that update library-owned fields.
.Ss Allocator Types
Currently,
.Nm
supports two kernel allocators:
.Dv ALLOCATOR_UMA
for
.Xr uma 9 ,
and
.Dv ALLOCATOR_MALLOC
for
.Xr malloc 9 .
These values may be passed to
.Fn memstat_mtl_find ,
and will be returned by
.Fn memstat_get_allocator .
Two additional constants in the allocator name space are defined:
.Dv ALLOCATOR_UNKNOWN ,
which will only be returned as a result of a library error, and
.Dv ALLOCATOR_ANY ,
which can be used to specify that returning types matching any allocator is
permittable from
.Fn memstat_mtl_find .
.Ss Access Method List
The following accessor methods are defined, of which some will be valid for
a given memory type:
.Bl -tag -width indent
.It Fn memstat_get_name
Return a pointer to the name of the memory type.
Memory for the name is owned by
.Nm
and will be valid through a call to
.Fn memstat_mtl_free .
Note that names will be unique with respect to a single allocator, but that
the same name might be used by different memory types owned by different
memory allocators.
.It Fn memstat_get_allocator
Return an integer identifier for the memory allocator that owns the memory
type.
.It Fn memstat_get_countlimit
If the memory type has an administrative limit on the number of simultaneous
allocations, return it.
.It Fn memstat_get_byteslimit
If the memory type has an administrative limit on the number of bytes of
memory that may be simultaneously allocated for the memory type, return it.
.It Fn memstat_get_sizemask
If the memory type supports variable allocation sizes, return a bitmask of
sizes allocated for the memory type.
.It Fn memstat_get_size
If the memory type supports a fixed allocation size, return that size.
.It Fn memstat_get_rsize
If the memory type supports a fixed allocation size, return real size
of an allocation.
Real size can exceed requested size due to alignment constraints or
implicit padding.
.It Fn memstat_get_memalloced
Return the total number of bytes allocated for the memory type over its
lifetime.
.It Fn memstat_get_memfreed
Return the total number of bytes freed for the memory type over its lifetime.
.It Fn memstat_get_numallocs
Return the total number of allocations for the memory type over its lifetime.
.It Fn memstat_get_numfrees
Return the total number of frees for the memory type over its lifetime.
.It Fn memstat_get_bytes
Return the current number of bytes allocated to the memory type.
.It Fn memstat_get_count
Return the current number of allocations for the memory type.
.It Fn memstat_get_free
If the memory allocator supports a cache, return the number of items in the
cache.
.It Fn memstat_get_failures
If the memory allocator and type permit allocation failures, return the
number of allocation failures measured.
.It Fn memstat_get_caller_pointer
Return a caller-owned pointer for the memory type.
.It Fn memstat_set_caller_pointer
Set a caller-owned pointer for the memory type.
.It Fn memstat_get_caller_uint64
Return a caller-owned integer for the memory type.
.It Fn memstat_set_caller_uint64
Set a caller-owned integer for the memory type.
.It Fn memstat_get_zonefree
If the memory allocator supports a multi-level allocation structure, return
the number of cached items in the zone.
These items will be in a fully constructed state available for immediate
use.
.It Fn memstat_get_kegfree
If the memory allocator supports a multi-level allocation structure, return
the number of cached items in the keg.
These items may be in a partially constructed state, and may require further
processing before they can be made available for use.
.It Fn memstat_get_percpu_memalloced
If the memory allocator supports per-CPU statistics, return the number of
bytes of memory allocated for the memory type on the CPU over its lifetime.
.It Fn memstat_get_percpu_memfreed
If the memory allocator supports per-CPU statistics, return the number of
bytes of memory freed from the memory type on the CPU over its lifetime.
.It Fn memstat_get_percpu_numallocs
If the memory allocator supports per-CPU statistics, return the number of
allocations for the memory type on the CPU over its lifetime.
.It Fn memstat_get_percpu_numfrees
If the memory allocator supports per-CPU statistics, return the number of
frees for the memory type on the CPU over its lifetime.
.It Fn memstat_get_percpu_sizemask
If the memory allocator supports variable size memory allocation and per-CPU
statistics, return the size bitmask for the memory type on the CPU.
.It Fn memstat_get_percpu_caller_pointer
Return a caller-owned per-CPU pointer for the memory type.
.It Fn memstat_set_percpu_caller_pointer
Set a caller-owned per-CPU pointer for the memory type.
.It Fn memstat_get_percpu_caller_uint64
Return a caller-owned per-CPU integer for the memory type.
.It Fn memstat_set_percpu_caller_uint64
Set a caller-owned per-CPU integer for the memory type.
.It Fn memstat_get_percpu_free
If the memory allocator supports a per-CPU cache, return the number of free
items in the per-CPU cache of the designated CPU.
.El
.Sh RETURN VALUES
.Nm
functions fall into three categories: functions returning a pointer to an
object, functions returning an integer return value, and functions
implementing accessor methods returning data from a
.Vt "struct memory_type" .
.Pp
Functions returning a pointer to an object will generally return
.Dv NULL
on failure.
.Fn memstat_mtl_alloc
will return an error value via
.Va errno ,
which will consist of the value
.Er ENOMEM .
Functions
.Fn memstat_mtl_first ,
.Fn memstat_mtl_next ,
and
.Fn memstat_mtl_find
will return
.Dv NULL
when there is no entry or match in the list; however, this is not considered
a failure mode and no error value is available.
.Pp
Functions returning an integer success value will return
0
on success, or
\-1
on failure.
If a failure is returned, the list error access method,
.Fn memstat_mtl_geterror ,
may be used to retrieve the error state.
The string representation of the error may be retrieved using
.Fn memstat_strerror .
Possible error values are:
.Bl -tag -width ".Dv MEMSTAT_ERROR_KVM_SHORTREAD"
.It Dv MEMSTAT_ERROR_UNDEFINED
Undefined error.
Occurs if
.Fn memstat_mtl_geterror
is called on a list before an error associated with the list has occurred.
.It Dv MEMSTAT_ERROR_NOMEMORY
Insufficient memory.
Occurs if library calls to
.Xr malloc 3
fail, or if a system call to retrieve kernel statistics fails with
.Er ENOMEM .
.It Dv MEMSTAT_ERROR_VERSION
Returned if the current version of
.Nm
is unable to interpret the statistics data returned by the kernel due to an
explicit version mismatch, or to differences in data structures that cannot
be reconciled.
.It Dv MEMSTAT_ERROR_PERMISSION
Returned if a statistics source returns
.Va errno
values of
.Er EACCES
or
.Er EPERM .
.It Dv MEMSTAT_ERROR_DATAERROR
Returned if
.Nm
is unable to interpret statistics data returned by the data source, even
though there does not appear to be a version problem.
.It Dv MEMSTAT_ERROR_KVM
Returned if
.Nm
experiences an error while using
.Xr kvm 3
interfaces to query statistics data.
Use
.Xr kvm_geterr 3
to retrieve the error.
.It Dv MEMSTAT_ERROR_KVM_NOSYMBOL
Returned if
.Nm
is unable to read a required symbol from the kernel being operated on.
.It Dv MEMSTAT_ERROR_KVM_SHORTREAD
Returned if
.Nm
attempts to read data from a live memory image or kernel core dump and
insufficient data is returned.
.El
.Pp
Finally, functions returning data from a
.Vt "struct memory_type"
pointer are not permitted to fail, and directly return either a statistic
or pointer to a string.
.Sh EXAMPLES
Create a memory type list, query the
.Xr uma 9
memory allocator for available statistics, and print out the number of
allocations performed by the
.Dv mbuf
zone.
.Bd -literal -offset indent
struct memory_type_list *mtlp;
struct memory_type *mtp;
uint64_t mbuf_count;
mtlp = memstat_mtl_alloc();
if (mtlp == NULL)
err(-1, "memstat_mtl_alloc");
if (memstat_sysctl_uma(mtlp, 0) < 0)
err(-1, "memstat_sysctl_uma");
mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, "mbuf");
if (mtp == NULL)
errx(-1, "memstat_mtl_find: mbuf not found");
mbuf_count = memstat_get_count(mtp);
memstat_mtl_free(mtlp);
printf("mbufs: %llu\en", (unsigned long long)mbuf_count);
.Ed
.Sh SEE ALSO
.Xr malloc 9 ,
.Xr uma 9
.Sh HISTORY
The
.Nm
library appeared in
.Fx 6.0 .
.Sh AUTHORS
The kernel memory allocator changes necessary to support a general purpose
monitoring library, along with the library, were written by
.An Robert Watson Aq Mt rwatson@FreeBSD.org .
.Sh BUGS
There are memory allocators in the kernel, such as the VM page allocator
and
.Nm sf_buf
allocator, which are not currently supported by
.Nm .
.Pp
Once a memory type is present on a memory type list, it will not be removed
even if the kernel no longer presents information on the type via its
monitoring interfaces.
In order to flush removed memory types, it is necessary to free the entire
list and allocate a new one.

444
tools/libmemstat/memstat.c Normal file
View File

@ -0,0 +1,444 @@
/*-
* Copyright (c) 2005 Robert N. M. Watson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/param.h>
#include <sys/sysctl.h>
#include <err.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef FSTACK
#include <stdint.h>
#endif
#include "memstat.h"
#include "memstat_internal.h"
const char *
memstat_strerror(int error)
{
switch (error) {
case MEMSTAT_ERROR_NOMEMORY:
return ("Cannot allocate memory");
case MEMSTAT_ERROR_VERSION:
return ("Version mismatch");
case MEMSTAT_ERROR_PERMISSION:
return ("Permission denied");
case MEMSTAT_ERROR_DATAERROR:
return ("Data format error");
case MEMSTAT_ERROR_KVM:
return ("KVM error");
case MEMSTAT_ERROR_KVM_NOSYMBOL:
return ("KVM unable to find symbol");
case MEMSTAT_ERROR_KVM_SHORTREAD:
return ("KVM short read");
case MEMSTAT_ERROR_UNDEFINED:
default:
return ("Unknown error");
}
}
struct memory_type_list *
memstat_mtl_alloc(void)
{
struct memory_type_list *mtlp;
mtlp = malloc(sizeof(*mtlp));
if (mtlp == NULL)
return (NULL);
LIST_INIT(&mtlp->mtl_list);
mtlp->mtl_error = MEMSTAT_ERROR_UNDEFINED;
return (mtlp);
}
struct memory_type *
memstat_mtl_first(struct memory_type_list *list)
{
return (LIST_FIRST(&list->mtl_list));
}
struct memory_type *
memstat_mtl_next(struct memory_type *mtp)
{
return (LIST_NEXT(mtp, mt_list));
}
void
_memstat_mtl_empty(struct memory_type_list *list)
{
struct memory_type *mtp;
while ((mtp = LIST_FIRST(&list->mtl_list))) {
free(mtp->mt_percpu_alloc);
free(mtp->mt_percpu_cache);
LIST_REMOVE(mtp, mt_list);
free(mtp);
}
}
void
memstat_mtl_free(struct memory_type_list *list)
{
_memstat_mtl_empty(list);
free(list);
}
int
memstat_mtl_geterror(struct memory_type_list *list)
{
return (list->mtl_error);
}
/*
* Look for an existing memory_type entry in a memory_type list, based on the
* allocator and name of the type. If not found, return NULL. No errno or
* memstat error.
*/
struct memory_type *
memstat_mtl_find(struct memory_type_list *list, int allocator,
const char *name)
{
struct memory_type *mtp;
LIST_FOREACH(mtp, &list->mtl_list, mt_list) {
if ((mtp->mt_allocator == allocator ||
allocator == ALLOCATOR_ANY) &&
strcmp(mtp->mt_name, name) == 0)
return (mtp);
}
return (NULL);
}
/*
* Allocate a new memory_type with the specificed allocator type and name,
* then insert into the list. The structure will be zero'd.
*
* libmemstat(3) internal function.
*/
struct memory_type *
_memstat_mt_allocate(struct memory_type_list *list, int allocator,
const char *name, int maxcpus)
{
struct memory_type *mtp;
mtp = malloc(sizeof(*mtp));
if (mtp == NULL)
return (NULL);
bzero(mtp, sizeof(*mtp));
mtp->mt_allocator = allocator;
mtp->mt_percpu_alloc = malloc(sizeof(struct mt_percpu_alloc_s) *
maxcpus);
mtp->mt_percpu_cache = malloc(sizeof(struct mt_percpu_cache_s) *
maxcpus);
strlcpy(mtp->mt_name, name, MEMTYPE_MAXNAME);
LIST_INSERT_HEAD(&list->mtl_list, mtp, mt_list);
return (mtp);
}
/*
* Reset any libmemstat(3)-owned statistics in a memory_type record so that
* it can be reused without incremental addition problems. Caller-owned
* memory is left "as-is", and must be updated by the caller if desired.
*
* libmemstat(3) internal function.
*/
void
_memstat_mt_reset_stats(struct memory_type *mtp, int maxcpus)
{
int i;
mtp->mt_countlimit = 0;
mtp->mt_byteslimit = 0;
mtp->mt_sizemask = 0;
mtp->mt_size = 0;
mtp->mt_memalloced = 0;
mtp->mt_memfreed = 0;
mtp->mt_numallocs = 0;
mtp->mt_numfrees = 0;
mtp->mt_bytes = 0;
mtp->mt_count = 0;
mtp->mt_free = 0;
mtp->mt_failures = 0;
mtp->mt_sleeps = 0;
mtp->mt_zonefree = 0;
mtp->mt_kegfree = 0;
for (i = 0; i < maxcpus; i++) {
mtp->mt_percpu_alloc[i].mtp_memalloced = 0;
mtp->mt_percpu_alloc[i].mtp_memfreed = 0;
mtp->mt_percpu_alloc[i].mtp_numallocs = 0;
mtp->mt_percpu_alloc[i].mtp_numfrees = 0;
mtp->mt_percpu_alloc[i].mtp_sizemask = 0;
mtp->mt_percpu_cache[i].mtp_free = 0;
}
}
/*
* Accessor methods for struct memory_type. Avoids encoding the structure
* ABI into the application.
*/
const char *
memstat_get_name(const struct memory_type *mtp)
{
return (mtp->mt_name);
}
int
memstat_get_allocator(const struct memory_type *mtp)
{
return (mtp->mt_allocator);
}
uint64_t
memstat_get_countlimit(const struct memory_type *mtp)
{
return (mtp->mt_countlimit);
}
uint64_t
memstat_get_byteslimit(const struct memory_type *mtp)
{
return (mtp->mt_byteslimit);
}
uint64_t
memstat_get_sizemask(const struct memory_type *mtp)
{
return (mtp->mt_sizemask);
}
uint64_t
memstat_get_size(const struct memory_type *mtp)
{
return (mtp->mt_size);
}
uint64_t
memstat_get_rsize(const struct memory_type *mtp)
{
return (mtp->mt_rsize);
}
uint64_t
memstat_get_memalloced(const struct memory_type *mtp)
{
return (mtp->mt_memalloced);
}
uint64_t
memstat_get_memfreed(const struct memory_type *mtp)
{
return (mtp->mt_memfreed);
}
uint64_t
memstat_get_numallocs(const struct memory_type *mtp)
{
return (mtp->mt_numallocs);
}
uint64_t
memstat_get_numfrees(const struct memory_type *mtp)
{
return (mtp->mt_numfrees);
}
uint64_t
memstat_get_bytes(const struct memory_type *mtp)
{
return (mtp->mt_bytes);
}
uint64_t
memstat_get_count(const struct memory_type *mtp)
{
return (mtp->mt_count);
}
uint64_t
memstat_get_free(const struct memory_type *mtp)
{
return (mtp->mt_free);
}
uint64_t
memstat_get_failures(const struct memory_type *mtp)
{
return (mtp->mt_failures);
}
uint64_t
memstat_get_sleeps(const struct memory_type *mtp)
{
return (mtp->mt_sleeps);
}
void *
memstat_get_caller_pointer(const struct memory_type *mtp, int index)
{
return (mtp->mt_caller_pointer[index]);
}
void
memstat_set_caller_pointer(struct memory_type *mtp, int index, void *value)
{
mtp->mt_caller_pointer[index] = value;
}
uint64_t
memstat_get_caller_uint64(const struct memory_type *mtp, int index)
{
return (mtp->mt_caller_uint64[index]);
}
void
memstat_set_caller_uint64(struct memory_type *mtp, int index, uint64_t value)
{
mtp->mt_caller_uint64[index] = value;
}
uint64_t
memstat_get_zonefree(const struct memory_type *mtp)
{
return (mtp->mt_zonefree);
}
uint64_t
memstat_get_kegfree(const struct memory_type *mtp)
{
return (mtp->mt_kegfree);
}
uint64_t
memstat_get_percpu_memalloced(const struct memory_type *mtp, int cpu)
{
return (mtp->mt_percpu_alloc[cpu].mtp_memalloced);
}
uint64_t
memstat_get_percpu_memfreed(const struct memory_type *mtp, int cpu)
{
return (mtp->mt_percpu_alloc[cpu].mtp_memfreed);
}
uint64_t
memstat_get_percpu_numallocs(const struct memory_type *mtp, int cpu)
{
return (mtp->mt_percpu_alloc[cpu].mtp_numallocs);
}
uint64_t
memstat_get_percpu_numfrees(const struct memory_type *mtp, int cpu)
{
return (mtp->mt_percpu_alloc[cpu].mtp_numfrees);
}
uint64_t
memstat_get_percpu_sizemask(const struct memory_type *mtp, int cpu)
{
return (mtp->mt_percpu_alloc[cpu].mtp_sizemask);
}
void *
memstat_get_percpu_caller_pointer(const struct memory_type *mtp, int cpu,
int index)
{
return (mtp->mt_percpu_alloc[cpu].mtp_caller_pointer[index]);
}
void
memstat_set_percpu_caller_pointer(struct memory_type *mtp, int cpu,
int index, void *value)
{
mtp->mt_percpu_alloc[cpu].mtp_caller_pointer[index] = value;
}
uint64_t
memstat_get_percpu_caller_uint64(const struct memory_type *mtp, int cpu,
int index)
{
return (mtp->mt_percpu_alloc[cpu].mtp_caller_uint64[index]);
}
void
memstat_set_percpu_caller_uint64(struct memory_type *mtp, int cpu, int index,
uint64_t value)
{
mtp->mt_percpu_alloc[cpu].mtp_caller_uint64[index] = value;
}
uint64_t
memstat_get_percpu_free(const struct memory_type *mtp, int cpu)
{
return (mtp->mt_percpu_cache[cpu].mtp_free);
}

169
tools/libmemstat/memstat.h Normal file
View File

@ -0,0 +1,169 @@
/*-
* Copyright (c) 2005 Robert N. M. Watson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _MEMSTAT_H_
#define _MEMSTAT_H_
/*
* Amount of caller data to maintain for each caller data slot. Applications
* must not request more than this number of caller save data, or risk
* corrupting internal libmemstat(3) data structures. A compile time check
* in the application is probably appropriate.
*/
#define MEMSTAT_MAXCALLER 16
/*
* libmemstat(3) is able to extract memory data from different allocators;
* when it does so, it tags which allocator it got the data from so that
* consumers can determine which fields are usable, as data returned varies
* some.
*/
#define ALLOCATOR_UNKNOWN 0
#define ALLOCATOR_MALLOC 1
#define ALLOCATOR_UMA 2
#define ALLOCATOR_ANY 255
/*
* Library maximum type name. Should be max(set of name maximums over
* various allocators).
*/
#define MEMTYPE_MAXNAME 32
/*
* Library error conditions, mostly from the underlying data sources. On
* failure, functions typically return (-1) or (NULL); on success, (0) or a
* valid data pointer. The error from the last operation is stored in
* struct memory_type_list, and accessed via memstat_get_error(list).
*/
#define MEMSTAT_ERROR_UNDEFINED 0 /* Initialization value. */
#define MEMSTAT_ERROR_NOMEMORY 1 /* Out of memory. */
#define MEMSTAT_ERROR_VERSION 2 /* Unsupported version. */
#define MEMSTAT_ERROR_PERMISSION 3 /* Permission denied. */
#define MEMSTAT_ERROR_DATAERROR 5 /* Error in stat data. */
#define MEMSTAT_ERROR_KVM 6 /* See kvm_geterr() for err. */
#define MEMSTAT_ERROR_KVM_NOSYMBOL 7 /* Symbol not available. */
#define MEMSTAT_ERROR_KVM_SHORTREAD 8 /* Short kvm_read return. */
/*
* Forward declare struct memory_type, which holds per-type properties and
* statistics. This is an opaque type, to be frobbed only from within the
* library, in order to avoid building ABI assumptions into the application.
* Accessor methods should be used to get and sometimes set the fields from
* consumers of the library.
*/
struct memory_type;
/*
* struct memory_type_list is the head of a list of memory types and
* statistics.
*/
struct memory_type_list;
__BEGIN_DECLS
/*
* Functions that operate without memory type or memory type list context.
*/
const char *memstat_strerror(int error);
/*
* Functions for managing memory type and statistics data.
*/
struct memory_type_list *memstat_mtl_alloc(void);
struct memory_type *memstat_mtl_first(struct memory_type_list *list);
struct memory_type *memstat_mtl_next(struct memory_type *mtp);
struct memory_type *memstat_mtl_find(struct memory_type_list *list,
int allocator, const char *name);
void memstat_mtl_free(struct memory_type_list *list);
int memstat_mtl_geterror(struct memory_type_list *list);
/*
* Functions to retrieve data from a live kernel using sysctl.
*/
int memstat_sysctl_all(struct memory_type_list *list, int flags);
int memstat_sysctl_malloc(struct memory_type_list *list, int flags);
int memstat_sysctl_uma(struct memory_type_list *list, int flags);
/*
* Functions to retrieve data from a kernel core (or /dev/kmem).
*/
int memstat_kvm_all(struct memory_type_list *list, void *kvm_handle);
int memstat_kvm_malloc(struct memory_type_list *list, void *kvm_handle);
int memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle);
/*
* Accessor methods for struct memory_type.
*/
const char *memstat_get_name(const struct memory_type *mtp);
int memstat_get_allocator(const struct memory_type *mtp);
uint64_t memstat_get_countlimit(const struct memory_type *mtp);
uint64_t memstat_get_byteslimit(const struct memory_type *mtp);
uint64_t memstat_get_sizemask(const struct memory_type *mtp);
uint64_t memstat_get_size(const struct memory_type *mtp);
uint64_t memstat_get_rsize(const struct memory_type *mtp);
uint64_t memstat_get_memalloced(const struct memory_type *mtp);
uint64_t memstat_get_memfreed(const struct memory_type *mtp);
uint64_t memstat_get_numallocs(const struct memory_type *mtp);
uint64_t memstat_get_numfrees(const struct memory_type *mtp);
uint64_t memstat_get_bytes(const struct memory_type *mtp);
uint64_t memstat_get_count(const struct memory_type *mtp);
uint64_t memstat_get_free(const struct memory_type *mtp);
uint64_t memstat_get_failures(const struct memory_type *mtp);
uint64_t memstat_get_sleeps(const struct memory_type *mtp);
void *memstat_get_caller_pointer(const struct memory_type *mtp,
int index);
void memstat_set_caller_pointer(struct memory_type *mtp,
int index, void *value);
uint64_t memstat_get_caller_uint64(const struct memory_type *mtp,
int index);
void memstat_set_caller_uint64(struct memory_type *mtp, int index,
uint64_t value);
uint64_t memstat_get_zonefree(const struct memory_type *mtp);
uint64_t memstat_get_kegfree(const struct memory_type *mtp);
uint64_t memstat_get_percpu_memalloced(const struct memory_type *mtp,
int cpu);
uint64_t memstat_get_percpu_memfreed(const struct memory_type *mtp,
int cpu);
uint64_t memstat_get_percpu_numallocs(const struct memory_type *mtp,
int cpu);
uint64_t memstat_get_percpu_numfrees(const struct memory_type *mtp,
int cpu);
uint64_t memstat_get_percpu_sizemask(const struct memory_type *mtp,
int cpu);
void *memstat_get_percpu_caller_pointer(
const struct memory_type *mtp, int cpu, int index);
void memstat_set_percpu_caller_pointer(struct memory_type *mtp,
int cpu, int index, void *value);
uint64_t memstat_get_percpu_caller_uint64(
const struct memory_type *mtp, int cpu, int index);
void memstat_set_percpu_caller_uint64(struct memory_type *mtp,
int cpu, int index, uint64_t value);
uint64_t memstat_get_percpu_free(const struct memory_type *mtp,
int cpu);
__END_DECLS
#endif /* !_MEMSTAT_H_ */

View File

@ -0,0 +1,65 @@
/*-
* Copyright (c) 2005 Robert N. M. Watson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/types.h>
#include <sys/queue.h>
#ifdef FSTACK
#include <stdint.h>
#endif
#include "memstat.h"
/*
* Query all available memory allocator sources. Currently this consists of
* malloc(9) and UMA(9).
*/
int
memstat_sysctl_all(struct memory_type_list *mtlp, int flags)
{
if (memstat_sysctl_malloc(mtlp, flags) < 0)
return (-1);
if (memstat_sysctl_uma(mtlp, flags) < 0)
return (-1);
return (0);
}
#ifndef FSTACK
int
memstat_kvm_all(struct memory_type_list *mtlp, void *kvm_handle)
{
if (memstat_kvm_malloc(mtlp, kvm_handle) < 0)
return (-1);
if (memstat_kvm_uma(mtlp, kvm_handle) < 0)
return (-1);
return (0);
}
#endif

View File

@ -0,0 +1,127 @@
/*-
* Copyright (c) 2005 Robert N. M. Watson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _MEMSTAT_INTERNAL_H_
#define _MEMSTAT_INTERNAL_H_
/*
* memstat maintains its own internal notion of statistics on each memory
* type, common across UMA and kernel malloc. Some fields are straight from
* the allocator statistics, others are derived when extracted from the
* kernel. A struct memory_type will describe each type supported by an
* allocator. memory_type structures can be chained into lists.
*/
struct memory_type {
/*
* Static properties of type.
*/
int mt_allocator; /* malloc(9), uma(9), etc. */
char mt_name[MEMTYPE_MAXNAME]; /* name of memory type. */
/*
* (Relatively) static zone settings, that don't uniquely identify
* the zone, but also don't change much.
*/
uint64_t mt_countlimit; /* 0, or maximum allocations. */
uint64_t mt_byteslimit; /* 0, or maximum bytes. */
uint64_t mt_sizemask; /* malloc: allocated size bitmask. */
uint64_t mt_size; /* uma: size of objects. */
uint64_t mt_rsize; /* uma: real size of objects. */
/*
* Zone or type information that includes all caches and any central
* zone state. Depending on the allocator, this may be synthesized
* from several sources, or directly measured.
*/
uint64_t mt_memalloced; /* Bytes allocated over life time. */
uint64_t mt_memfreed; /* Bytes freed over life time. */
uint64_t mt_numallocs; /* Allocations over life time. */
uint64_t mt_numfrees; /* Frees over life time. */
uint64_t mt_bytes; /* Bytes currently allocated. */
uint64_t mt_count; /* Number of current allocations. */
uint64_t mt_free; /* Number of cached free items. */
uint64_t mt_failures; /* Number of allocation failures. */
uint64_t mt_sleeps; /* Number of allocation sleeps. */
/*
* Caller-owned memory.
*/
void *mt_caller_pointer[MEMSTAT_MAXCALLER]; /* Pointers. */
uint64_t mt_caller_uint64[MEMSTAT_MAXCALLER]; /* Integers. */
/*
* For allocators making use of per-CPU caches, we also provide raw
* statistics from the central allocator and each per-CPU cache,
* which (combined) sometimes make up the above general statistics.
*
* First, central zone/type state, all numbers excluding any items
* cached in per-CPU caches.
*
* XXXRW: Might be desirable to separately expose allocation stats
* from zone, which should (combined with per-cpu) add up to the
* global stats above.
*/
uint64_t mt_zonefree; /* Free items in zone. */
uint64_t mt_kegfree; /* Free items in keg. */
/*
* Per-CPU measurements fall into two categories: per-CPU allocation,
* and per-CPU cache state.
*/
struct mt_percpu_alloc_s {
uint64_t mtp_memalloced;/* Per-CPU mt_memalloced. */
uint64_t mtp_memfreed; /* Per-CPU mt_memfreed. */
uint64_t mtp_numallocs; /* Per-CPU mt_numallocs. */
uint64_t mtp_numfrees; /* Per-CPU mt_numfrees. */
uint64_t mtp_sizemask; /* Per-CPU mt_sizemask. */
void *mtp_caller_pointer[MEMSTAT_MAXCALLER];
uint64_t mtp_caller_uint64[MEMSTAT_MAXCALLER];
} *mt_percpu_alloc;
struct mt_percpu_cache_s {
uint64_t mtp_free; /* Per-CPU cache free items. */
} *mt_percpu_cache;
LIST_ENTRY(memory_type) mt_list; /* List of types. */
};
/*
* Description of struct memory_type_list is in memstat.h.
*/
struct memory_type_list {
LIST_HEAD(, memory_type) mtl_list;
int mtl_error;
};
void _memstat_mtl_empty(struct memory_type_list *list);
struct memory_type *_memstat_mt_allocate(struct memory_type_list *list,
int allocator, const char *name, int maxcpus);
void _memstat_mt_reset_stats(struct memory_type *mtp,
int maxcpus);
#endif /* !_MEMSTAT_INTERNAL_H_ */

View File

@ -0,0 +1,413 @@
/*-
* Copyright (c) 2005 Robert N. M. Watson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifdef FSTACK
#include <stdint.h>
#endif
#include <sys/cdefs.h>
#include <sys/param.h>
#include <sys/malloc.h>
#include <sys/sysctl.h>
#include <err.h>
#include <errno.h>
#ifndef FSTACK
#include <kvm.h>
#endif
#include <nlist.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "memstat.h"
#include "memstat_internal.h"
#ifndef FSTACK
static struct nlist namelist[] = {
#define X_KMEMSTATISTICS 0
{ .n_name = "_kmemstatistics" },
#define X_MP_MAXCPUS 1
{ .n_name = "_mp_maxcpus" },
{ .n_name = "" },
};
#endif
/*
* Extract malloc(9) statistics from the running kernel, and store all memory
* type information in the passed list. For each type, check the list for an
* existing entry with the right name/allocator -- if present, update that
* entry. Otherwise, add a new entry. On error, the entire list will be
* cleared, as entries will be in an inconsistent state.
*
* To reduce the level of work for a list that starts empty, we keep around a
* hint as to whether it was empty when we began, so we can avoid searching
* the list for entries to update. Updates are O(n^2) due to searching for
* each entry before adding it.
*/
int
memstat_sysctl_malloc(struct memory_type_list *list, int flags)
{
struct malloc_type_stream_header *mtshp;
struct malloc_type_header *mthp;
struct malloc_type_stats *mtsp;
struct memory_type *mtp;
int count, hint_dontsearch, i, j, maxcpus;
char *buffer, *p;
size_t size;
hint_dontsearch = LIST_EMPTY(&list->mtl_list);
/*
* Query the number of CPUs, number of malloc types so that we can
* guess an initial buffer size. We loop until we succeed or really
* fail. Note that the value of maxcpus we query using sysctl is not
* the version we use when processing the real data -- that is read
* from the header.
*/
retry:
size = sizeof(maxcpus);
if (sysctlbyname("kern.smp.maxcpus", &maxcpus, &size, NULL, 0) < 0) {
if (errno == EACCES || errno == EPERM)
list->mtl_error = MEMSTAT_ERROR_PERMISSION;
else
list->mtl_error = MEMSTAT_ERROR_DATAERROR;
return (-1);
}
if (size != sizeof(maxcpus)) {
list->mtl_error = MEMSTAT_ERROR_DATAERROR;
return (-1);
}
size = sizeof(count);
if (sysctlbyname("kern.malloc_count", &count, &size, NULL, 0) < 0) {
if (errno == EACCES || errno == EPERM)
list->mtl_error = MEMSTAT_ERROR_PERMISSION;
else
list->mtl_error = MEMSTAT_ERROR_VERSION;
return (-1);
}
if (size != sizeof(count)) {
list->mtl_error = MEMSTAT_ERROR_DATAERROR;
return (-1);
}
size = sizeof(*mthp) + count * (sizeof(*mthp) + sizeof(*mtsp) *
maxcpus);
buffer = malloc(size);
if (buffer == NULL) {
list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
return (-1);
}
if (sysctlbyname("kern.malloc_stats", buffer, &size, NULL, 0) < 0) {
/*
* XXXRW: ENOMEM is an ambiguous return, we should bound the
* number of loops, perhaps.
*/
if (errno == ENOMEM) {
free(buffer);
goto retry;
}
if (errno == EACCES || errno == EPERM)
list->mtl_error = MEMSTAT_ERROR_PERMISSION;
else
list->mtl_error = MEMSTAT_ERROR_VERSION;
free(buffer);
return (-1);
}
if (size == 0) {
free(buffer);
return (0);
}
if (size < sizeof(*mtshp)) {
list->mtl_error = MEMSTAT_ERROR_VERSION;
free(buffer);
return (-1);
}
p = buffer;
mtshp = (struct malloc_type_stream_header *)p;
p += sizeof(*mtshp);
if (mtshp->mtsh_version != MALLOC_TYPE_STREAM_VERSION) {
list->mtl_error = MEMSTAT_ERROR_VERSION;
free(buffer);
return (-1);
}
/*
* For the remainder of this function, we are quite trusting about
* the layout of structures and sizes, since we've determined we have
* a matching version and acceptable CPU count.
*/
maxcpus = mtshp->mtsh_maxcpus;
count = mtshp->mtsh_count;
for (i = 0; i < count; i++) {
mthp = (struct malloc_type_header *)p;
p += sizeof(*mthp);
if (hint_dontsearch == 0) {
mtp = memstat_mtl_find(list, ALLOCATOR_MALLOC,
mthp->mth_name);
} else
mtp = NULL;
if (mtp == NULL)
mtp = _memstat_mt_allocate(list, ALLOCATOR_MALLOC,
mthp->mth_name, maxcpus);
if (mtp == NULL) {
_memstat_mtl_empty(list);
free(buffer);
list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
return (-1);
}
/*
* Reset the statistics on a current node.
*/
_memstat_mt_reset_stats(mtp, maxcpus);
for (j = 0; j < maxcpus; j++) {
mtsp = (struct malloc_type_stats *)p;
p += sizeof(*mtsp);
/*
* Sumarize raw statistics across CPUs into coalesced
* statistics.
*/
mtp->mt_memalloced += mtsp->mts_memalloced;
mtp->mt_memfreed += mtsp->mts_memfreed;
mtp->mt_numallocs += mtsp->mts_numallocs;
mtp->mt_numfrees += mtsp->mts_numfrees;
mtp->mt_sizemask |= mtsp->mts_size;
/*
* Copies of per-CPU statistics.
*/
mtp->mt_percpu_alloc[j].mtp_memalloced =
mtsp->mts_memalloced;
mtp->mt_percpu_alloc[j].mtp_memfreed =
mtsp->mts_memfreed;
mtp->mt_percpu_alloc[j].mtp_numallocs =
mtsp->mts_numallocs;
mtp->mt_percpu_alloc[j].mtp_numfrees =
mtsp->mts_numfrees;
mtp->mt_percpu_alloc[j].mtp_sizemask =
mtsp->mts_size;
}
/*
* Derived cross-CPU statistics.
*/
mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed;
mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees;
}
free(buffer);
return (0);
}
#ifndef FSTACK
static int
kread(kvm_t *kvm, void *kvm_pointer, void *address, size_t size,
size_t offset)
{
ssize_t ret;
ret = kvm_read(kvm, (unsigned long)kvm_pointer + offset, address,
size);
if (ret < 0)
return (MEMSTAT_ERROR_KVM);
if ((size_t)ret != size)
return (MEMSTAT_ERROR_KVM_SHORTREAD);
return (0);
}
static int
kread_string(kvm_t *kvm, const void *kvm_pointer, char *buffer, int buflen)
{
ssize_t ret;
int i;
for (i = 0; i < buflen; i++) {
ret = kvm_read(kvm, __DECONST(unsigned long, kvm_pointer) +
i, &(buffer[i]), sizeof(char));
if (ret < 0)
return (MEMSTAT_ERROR_KVM);
if ((size_t)ret != sizeof(char))
return (MEMSTAT_ERROR_KVM_SHORTREAD);
if (buffer[i] == '\0')
return (0);
}
/* Truncate. */
buffer[i-1] = '\0';
return (0);
}
static int
kread_symbol(kvm_t *kvm, int index, void *address, size_t size,
size_t offset)
{
ssize_t ret;
ret = kvm_read(kvm, namelist[index].n_value + offset, address, size);
if (ret < 0)
return (MEMSTAT_ERROR_KVM);
if ((size_t)ret != size)
return (MEMSTAT_ERROR_KVM_SHORTREAD);
return (0);
}
int
memstat_kvm_malloc(struct memory_type_list *list, void *kvm_handle)
{
struct memory_type *mtp;
void *kmemstatistics;
int hint_dontsearch, j, mp_maxcpus, ret;
char name[MEMTYPE_MAXNAME];
struct malloc_type_stats *mts, *mtsp;
struct malloc_type_internal *mtip;
struct malloc_type type, *typep;
kvm_t *kvm;
kvm = (kvm_t *)kvm_handle;
hint_dontsearch = LIST_EMPTY(&list->mtl_list);
if (kvm_nlist(kvm, namelist) != 0) {
list->mtl_error = MEMSTAT_ERROR_KVM;
return (-1);
}
if (namelist[X_KMEMSTATISTICS].n_type == 0 ||
namelist[X_KMEMSTATISTICS].n_value == 0) {
list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL;
return (-1);
}
ret = kread_symbol(kvm, X_MP_MAXCPUS, &mp_maxcpus,
sizeof(mp_maxcpus), 0);
if (ret != 0) {
list->mtl_error = ret;
return (-1);
}
ret = kread_symbol(kvm, X_KMEMSTATISTICS, &kmemstatistics,
sizeof(kmemstatistics), 0);
if (ret != 0) {
list->mtl_error = ret;
return (-1);
}
mts = malloc(sizeof(struct malloc_type_stats) * mp_maxcpus);
if (mts == NULL) {
list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
return (-1);
}
for (typep = kmemstatistics; typep != NULL; typep = type.ks_next) {
ret = kread(kvm, typep, &type, sizeof(type), 0);
if (ret != 0) {
_memstat_mtl_empty(list);
free(mts);
list->mtl_error = ret;
return (-1);
}
ret = kread_string(kvm, (void *)type.ks_shortdesc, name,
MEMTYPE_MAXNAME);
if (ret != 0) {
_memstat_mtl_empty(list);
free(mts);
list->mtl_error = ret;
return (-1);
}
/*
* Since our compile-time value for MAXCPU may differ from the
* kernel's, we populate our own array.
*/
mtip = type.ks_handle;
ret = kread(kvm, mtip->mti_stats, mts, mp_maxcpus *
sizeof(struct malloc_type_stats), 0);
if (ret != 0) {
_memstat_mtl_empty(list);
free(mts);
list->mtl_error = ret;
return (-1);
}
if (hint_dontsearch == 0) {
mtp = memstat_mtl_find(list, ALLOCATOR_MALLOC, name);
} else
mtp = NULL;
if (mtp == NULL)
mtp = _memstat_mt_allocate(list, ALLOCATOR_MALLOC,
name, mp_maxcpus);
if (mtp == NULL) {
_memstat_mtl_empty(list);
free(mts);
list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
return (-1);
}
/*
* This logic is replicated from kern_malloc.c, and should
* be kept in sync.
*/
_memstat_mt_reset_stats(mtp, mp_maxcpus);
for (j = 0; j < mp_maxcpus; j++) {
mtsp = &mts[j];
mtp->mt_memalloced += mtsp->mts_memalloced;
mtp->mt_memfreed += mtsp->mts_memfreed;
mtp->mt_numallocs += mtsp->mts_numallocs;
mtp->mt_numfrees += mtsp->mts_numfrees;
mtp->mt_sizemask |= mtsp->mts_size;
mtp->mt_percpu_alloc[j].mtp_memalloced =
mtsp->mts_memalloced;
mtp->mt_percpu_alloc[j].mtp_memfreed =
mtsp->mts_memfreed;
mtp->mt_percpu_alloc[j].mtp_numallocs =
mtsp->mts_numallocs;
mtp->mt_percpu_alloc[j].mtp_numfrees =
mtsp->mts_numfrees;
mtp->mt_percpu_alloc[j].mtp_sizemask =
mtsp->mts_size;
}
mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed;
mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees;
}
return (0);
}
#endif

View File

@ -0,0 +1,475 @@
/*-
* Copyright (c) 2005-2006 Robert N. M. Watson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/param.h>
#include <sys/cpuset.h>
#include <sys/sysctl.h>
#ifndef FSTACK
#include <vm/vm.h>
#include <vm/vm_page.h>
#endif
#include <vm/uma.h>
#include <vm/uma_int.h>
#include <err.h>
#include <errno.h>
#ifndef FSTACK
#include <kvm.h>
#endif
#include <nlist.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "memstat.h"
#include "memstat_internal.h"
#ifndef FSTACK
static struct nlist namelist[] = {
#define X_UMA_KEGS 0
{ .n_name = "_uma_kegs" },
#define X_MP_MAXID 1
{ .n_name = "_mp_maxid" },
#define X_ALL_CPUS 2
{ .n_name = "_all_cpus" },
{ .n_name = "" },
};
#endif
/*
* Extract uma(9) statistics from the running kernel, and store all memory
* type information in the passed list. For each type, check the list for an
* existing entry with the right name/allocator -- if present, update that
* entry. Otherwise, add a new entry. On error, the entire list will be
* cleared, as entries will be in an inconsistent state.
*
* To reduce the level of work for a list that starts empty, we keep around a
* hint as to whether it was empty when we began, so we can avoid searching
* the list for entries to update. Updates are O(n^2) due to searching for
* each entry before adding it.
*/
int
memstat_sysctl_uma(struct memory_type_list *list, int flags)
{
struct uma_stream_header *ushp;
struct uma_type_header *uthp;
struct uma_percpu_stat *upsp;
struct memory_type *mtp;
int count, hint_dontsearch, i, j, maxcpus, maxid;
char *buffer, *p;
size_t size;
hint_dontsearch = LIST_EMPTY(&list->mtl_list);
/*
* Query the number of CPUs, number of malloc types so that we can
* guess an initial buffer size. We loop until we succeed or really
* fail. Note that the value of maxcpus we query using sysctl is not
* the version we use when processing the real data -- that is read
* from the header.
*/
retry:
size = sizeof(maxid);
if (sysctlbyname("kern.smp.maxid", &maxid, &size, NULL, 0) < 0) {
if (errno == EACCES || errno == EPERM)
list->mtl_error = MEMSTAT_ERROR_PERMISSION;
else
list->mtl_error = MEMSTAT_ERROR_DATAERROR;
return (-1);
}
if (size != sizeof(maxid)) {
list->mtl_error = MEMSTAT_ERROR_DATAERROR;
return (-1);
}
size = sizeof(count);
if (sysctlbyname("vm.zone_count", &count, &size, NULL, 0) < 0) {
if (errno == EACCES || errno == EPERM)
list->mtl_error = MEMSTAT_ERROR_PERMISSION;
else
list->mtl_error = MEMSTAT_ERROR_VERSION;
return (-1);
}
if (size != sizeof(count)) {
list->mtl_error = MEMSTAT_ERROR_DATAERROR;
return (-1);
}
size = sizeof(*uthp) + count * (sizeof(*uthp) + sizeof(*upsp) *
(maxid + 1));
buffer = malloc(size);
if (buffer == NULL) {
list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
return (-1);
}
if (sysctlbyname("vm.zone_stats", buffer, &size, NULL, 0) < 0) {
/*
* XXXRW: ENOMEM is an ambiguous return, we should bound the
* number of loops, perhaps.
*/
if (errno == ENOMEM) {
free(buffer);
goto retry;
}
if (errno == EACCES || errno == EPERM)
list->mtl_error = MEMSTAT_ERROR_PERMISSION;
else
list->mtl_error = MEMSTAT_ERROR_VERSION;
free(buffer);
return (-1);
}
if (size == 0) {
free(buffer);
return (0);
}
if (size < sizeof(*ushp)) {
list->mtl_error = MEMSTAT_ERROR_VERSION;
free(buffer);
return (-1);
}
p = buffer;
ushp = (struct uma_stream_header *)p;
p += sizeof(*ushp);
if (ushp->ush_version != UMA_STREAM_VERSION) {
list->mtl_error = MEMSTAT_ERROR_VERSION;
free(buffer);
return (-1);
}
/*
* For the remainder of this function, we are quite trusting about
* the layout of structures and sizes, since we've determined we have
* a matching version and acceptable CPU count.
*/
maxcpus = ushp->ush_maxcpus;
count = ushp->ush_count;
for (i = 0; i < count; i++) {
uthp = (struct uma_type_header *)p;
p += sizeof(*uthp);
if (hint_dontsearch == 0) {
mtp = memstat_mtl_find(list, ALLOCATOR_UMA,
uthp->uth_name);
} else
mtp = NULL;
if (mtp == NULL)
mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA,
uthp->uth_name, maxid + 1);
if (mtp == NULL) {
_memstat_mtl_empty(list);
free(buffer);
list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
return (-1);
}
/*
* Reset the statistics on a current node.
*/
_memstat_mt_reset_stats(mtp, maxid + 1);
mtp->mt_numallocs = uthp->uth_allocs;
mtp->mt_numfrees = uthp->uth_frees;
mtp->mt_failures = uthp->uth_fails;
mtp->mt_sleeps = uthp->uth_sleeps;
for (j = 0; j < maxcpus; j++) {
upsp = (struct uma_percpu_stat *)p;
p += sizeof(*upsp);
mtp->mt_percpu_cache[j].mtp_free =
upsp->ups_cache_free;
mtp->mt_free += upsp->ups_cache_free;
mtp->mt_numallocs += upsp->ups_allocs;
mtp->mt_numfrees += upsp->ups_frees;
}
mtp->mt_size = uthp->uth_size;
mtp->mt_rsize = uthp->uth_rsize;
mtp->mt_memalloced = mtp->mt_numallocs * uthp->uth_size;
mtp->mt_memfreed = mtp->mt_numfrees * uthp->uth_size;
mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed;
mtp->mt_countlimit = uthp->uth_limit;
mtp->mt_byteslimit = uthp->uth_limit * uthp->uth_size;
mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees;
mtp->mt_zonefree = uthp->uth_zone_free;
/*
* UMA secondary zones share a keg with the primary zone. To
* avoid double-reporting of free items, report keg free
* items only in the primary zone.
*/
if (!(uthp->uth_zone_flags & UTH_ZONE_SECONDARY)) {
mtp->mt_kegfree = uthp->uth_keg_free;
mtp->mt_free += mtp->mt_kegfree;
}
mtp->mt_free += mtp->mt_zonefree;
}
free(buffer);
return (0);
}
#ifndef FSTACK
static int
kread(kvm_t *kvm, void *kvm_pointer, void *address, size_t size,
size_t offset)
{
ssize_t ret;
ret = kvm_read(kvm, (unsigned long)kvm_pointer + offset, address,
size);
if (ret < 0)
return (MEMSTAT_ERROR_KVM);
if ((size_t)ret != size)
return (MEMSTAT_ERROR_KVM_SHORTREAD);
return (0);
}
static int
kread_string(kvm_t *kvm, const void *kvm_pointer, char *buffer, int buflen)
{
ssize_t ret;
int i;
for (i = 0; i < buflen; i++) {
ret = kvm_read(kvm, (unsigned long)kvm_pointer + i,
&(buffer[i]), sizeof(char));
if (ret < 0)
return (MEMSTAT_ERROR_KVM);
if ((size_t)ret != sizeof(char))
return (MEMSTAT_ERROR_KVM_SHORTREAD);
if (buffer[i] == '\0')
return (0);
}
/* Truncate. */
buffer[i-1] = '\0';
return (0);
}
static int
kread_symbol(kvm_t *kvm, int index, void *address, size_t size,
size_t offset)
{
ssize_t ret;
ret = kvm_read(kvm, namelist[index].n_value + offset, address, size);
if (ret < 0)
return (MEMSTAT_ERROR_KVM);
if ((size_t)ret != size)
return (MEMSTAT_ERROR_KVM_SHORTREAD);
return (0);
}
/*
* memstat_kvm_uma() is similar to memstat_sysctl_uma(), only it extracts
* UMA(9) statistics from a kernel core/memory file.
*/
int
memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle)
{
LIST_HEAD(, uma_keg) uma_kegs;
struct memory_type *mtp;
struct uma_bucket *ubp, ub;
struct uma_cache *ucp, *ucp_array;
struct uma_zone *uzp, uz;
struct uma_keg *kzp, kz;
int hint_dontsearch, i, mp_maxid, ret;
char name[MEMTYPE_MAXNAME];
cpuset_t all_cpus;
long cpusetsize;
kvm_t *kvm;
kvm = (kvm_t *)kvm_handle;
hint_dontsearch = LIST_EMPTY(&list->mtl_list);
if (kvm_nlist(kvm, namelist) != 0) {
list->mtl_error = MEMSTAT_ERROR_KVM;
return (-1);
}
if (namelist[X_UMA_KEGS].n_type == 0 ||
namelist[X_UMA_KEGS].n_value == 0) {
list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL;
return (-1);
}
ret = kread_symbol(kvm, X_MP_MAXID, &mp_maxid, sizeof(mp_maxid), 0);
if (ret != 0) {
list->mtl_error = ret;
return (-1);
}
ret = kread_symbol(kvm, X_UMA_KEGS, &uma_kegs, sizeof(uma_kegs), 0);
if (ret != 0) {
list->mtl_error = ret;
return (-1);
}
cpusetsize = sysconf(_SC_CPUSET_SIZE);
if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) {
list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL;
return (-1);
}
CPU_ZERO(&all_cpus);
ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, cpusetsize, 0);
if (ret != 0) {
list->mtl_error = ret;
return (-1);
}
ucp_array = malloc(sizeof(struct uma_cache) * (mp_maxid + 1));
if (ucp_array == NULL) {
list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
return (-1);
}
for (kzp = LIST_FIRST(&uma_kegs); kzp != NULL; kzp =
LIST_NEXT(&kz, uk_link)) {
ret = kread(kvm, kzp, &kz, sizeof(kz), 0);
if (ret != 0) {
free(ucp_array);
_memstat_mtl_empty(list);
list->mtl_error = ret;
return (-1);
}
for (uzp = LIST_FIRST(&kz.uk_zones); uzp != NULL; uzp =
LIST_NEXT(&uz, uz_link)) {
ret = kread(kvm, uzp, &uz, sizeof(uz), 0);
if (ret != 0) {
free(ucp_array);
_memstat_mtl_empty(list);
list->mtl_error = ret;
return (-1);
}
ret = kread(kvm, uzp, ucp_array,
sizeof(struct uma_cache) * (mp_maxid + 1),
offsetof(struct uma_zone, uz_cpu[0]));
if (ret != 0) {
free(ucp_array);
_memstat_mtl_empty(list);
list->mtl_error = ret;
return (-1);
}
ret = kread_string(kvm, uz.uz_name, name,
MEMTYPE_MAXNAME);
if (ret != 0) {
free(ucp_array);
_memstat_mtl_empty(list);
list->mtl_error = ret;
return (-1);
}
if (hint_dontsearch == 0) {
mtp = memstat_mtl_find(list, ALLOCATOR_UMA,
name);
} else
mtp = NULL;
if (mtp == NULL)
mtp = _memstat_mt_allocate(list, ALLOCATOR_UMA,
name, mp_maxid + 1);
if (mtp == NULL) {
free(ucp_array);
_memstat_mtl_empty(list);
list->mtl_error = MEMSTAT_ERROR_NOMEMORY;
return (-1);
}
/*
* Reset the statistics on a current node.
*/
_memstat_mt_reset_stats(mtp, mp_maxid + 1);
mtp->mt_numallocs = uz.uz_allocs;
mtp->mt_numfrees = uz.uz_frees;
mtp->mt_failures = uz.uz_fails;
mtp->mt_sleeps = uz.uz_sleeps;
if (kz.uk_flags & UMA_ZFLAG_INTERNAL)
goto skip_percpu;
for (i = 0; i < mp_maxid + 1; i++) {
if (!CPU_ISSET(i, &all_cpus))
continue;
ucp = &ucp_array[i];
mtp->mt_numallocs += ucp->uc_allocs;
mtp->mt_numfrees += ucp->uc_frees;
if (ucp->uc_allocbucket != NULL) {
ret = kread(kvm, ucp->uc_allocbucket,
&ub, sizeof(ub), 0);
if (ret != 0) {
free(ucp_array);
_memstat_mtl_empty(list);
list->mtl_error = ret;
return (-1);
}
mtp->mt_free += ub.ub_cnt;
}
if (ucp->uc_freebucket != NULL) {
ret = kread(kvm, ucp->uc_freebucket,
&ub, sizeof(ub), 0);
if (ret != 0) {
free(ucp_array);
_memstat_mtl_empty(list);
list->mtl_error = ret;
return (-1);
}
mtp->mt_free += ub.ub_cnt;
}
}
skip_percpu:
mtp->mt_size = kz.uk_size;
mtp->mt_rsize = kz.uk_rsize;
mtp->mt_memalloced = mtp->mt_numallocs * mtp->mt_size;
mtp->mt_memfreed = mtp->mt_numfrees * mtp->mt_size;
mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed;
if (kz.uk_ppera > 1)
mtp->mt_countlimit = kz.uk_maxpages /
kz.uk_ipers;
else
mtp->mt_countlimit = kz.uk_maxpages *
kz.uk_ipers;
mtp->mt_byteslimit = mtp->mt_countlimit * mtp->mt_size;
mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees;
for (ubp = LIST_FIRST(&uz.uz_buckets); ubp !=
NULL; ubp = LIST_NEXT(&ub, ub_link)) {
ret = kread(kvm, ubp, &ub, sizeof(ub), 0);
mtp->mt_zonefree += ub.ub_cnt;
}
if (!((kz.uk_flags & UMA_ZONE_SECONDARY) &&
LIST_FIRST(&kz.uk_zones) != uzp)) {
mtp->mt_kegfree = kz.uk_free;
mtp->mt_free += mtp->mt_kegfree;
}
mtp->mt_free += mtp->mt_zonefree;
}
}
free(ucp_array);
return (0);
}
#endif

98
tools/libutil/Makefile Normal file
View File

@ -0,0 +1,98 @@
# @(#)Makefile 8.1 (Berkeley) 6/4/93
# $FreeBSD$
PACKAGE=lib${LIB}
SHLIBDIR?= /lib
TOPDIR?=${CURDIR}/../..
include ${TOPDIR}/tools/opts.mk
LIB= util
SHLIB_MAJOR= 9
#SRCS= _secure_path.c auth.c expand_number.c flopen.c fparseln.c gr_util.c \
hexdump.c humanize_number.c kinfo_getfile.c \
kinfo_getallproc.c kinfo_getproc.c kinfo_getvmmap.c \
kinfo_getvmobject.c kld.c \
login_auth.c login_cap.c \
login_class.c login_crypt.c login_ok.c login_times.c login_tty.c \
pidfile.c property.c pty.c pw_util.c quotafile.c realhostname.c \
stub.c trimdomain.c uucplock.c
SRCS= _secure_path.c auth.c expand_number.c flopen.c fparseln.c \
hexdump.c humanize_number.c \
stub.c trimdomain.c
INCS= libutil.h login_cap.h
CFLAGS+= -DLIBC_SCCS
ifneq (${MK_INET6_SUPPORT},"no")
CFLAGS+= -DINET6
endif
CFLAGS+= -I${CURDIR} -I${CURDIR}/../libc/gen/
MAN+= expand_number.3 flopen.3 fparseln.3 hexdump.3 \
humanize_number.3 kinfo_getallproc.3 kinfo_getfile.3 \
kinfo_getproc.3 kinfo_getvmmap.3 kinfo_getvmobject.3 kld.3 \
login_auth.3 login_cap.3 \
login_class.3 login_ok.3 login_times.3 login_tty.3 pidfile.3 \
property.3 pty.3 quotafile.3 realhostname.3 realhostname_sa.3 \
_secure_path.3 trimdomain.3 uucplock.3 pw_util.3
MAN+= login.conf.5
MLINKS+= kld.3 kld_isloaded.3 kld.3 kld_load.3
MLINKS+=login_auth.3 auth_cat.3 login_auth.3 auth_checknologin.3
MLINKS+=login_cap.3 login_close.3 login_cap.3 login_getcapbool.3 \
login_cap.3 login_getcaplist.3 login_cap.3 login_getcapnum.3 \
login_cap.3 login_getcapsize.3 login_cap.3 login_getcapstr.3 \
login_cap.3 login_getcaptime.3 login_cap.3 login_getclass.3 \
login_cap.3 login_getclassbyname.3 login_cap.3 login_getpath.3 \
login_cap.3 login_getpwclass.3 login_cap.3 login_getstyle.3 \
login_cap.3 login_getuserclass.3 login_cap.3 login_setcryptfmt.3
MLINKS+=login_class.3 setclasscontext.3 login_class.3 setclassenvironment.3 \
login_class.3 setclassresources.3 login_class.3 setusercontext.3
MLINKS+=login_ok.3 auth_hostok.3 login_ok.3 auth_timeok.3 \
login_ok.3 auth_ttyok.3
MLINKS+=login_times.3 in_lt.3 login_times.3 in_ltm.3 \
login_times.3 in_ltms.3 \
login_times.3 in_lts.3 \
login_times.3 parse_lt.3
MLINKS+=pidfile.3 pidfile_close.3 \
pidfile.3 pidfile_fileno.3 \
pidfile.3 pidfile_open.3 \
pidfile.3 pidfile_remove.3 \
pidfile.3 pidfile_write.3
MLINKS+= property.3 property_find.3 property.3 properties_free.3
MLINKS+= property.3 properties_read.3
MLINKS+= pty.3 forkpty.3 pty.3 openpty.3
MLINKS+=quotafile.3 quota_close.3 \
quotafile.3 quota_fsname.3 \
quotafile.3 quota_open.3 \
quotafile.3 quota_qfname.3 \
quotafile.3 quota_read.3 \
quotafile.3 quota_statfs.3 \
quotafile.3 quota_write_limits.3 \
quotafile.3 quota_write_usage.3
MLINKS+=uucplock.3 uu_lock.3 uucplock.3 uu_lock_txfr.3 \
uucplock.3 uu_lockerr.3 uucplock.3 uu_unlock.3
MLINKS+=pw_util.3 pw_copy.3 \
pw_util.3 pw_dup.3 \
pw_util.3 pw_edit.3 \
pw_util.3 pw_equal.3 \
pw_util.3 pw_fini.3 \
pw_util.3 pw_init.3 \
pw_util.3 pw_make.3 \
pw_util.3 pw_make_v7.3 \
pw_util.3 pw_mkdb.3 \
pw_util.3 pw_lock.3 \
pw_util.3 pw_scan.3 \
pw_util.3 pw_tempname.3 \
pw_util.3 pw_tmp.3
ifneq (${MK_TESTS},"no")
SUBDIR+= tests
endif
include ${TOPDIR}/tools/lib.mk

View File

@ -0,0 +1,19 @@
# $FreeBSD$
# Autogenerated - do NOT edit!
DIRDEPS = \
gnu/lib/csu \
gnu/lib/libgcc \
include \
include/arpa \
include/xlocale \
lib/${CSU_DIR} \
lib/libc \
lib/libcompiler_rt \
.include <dirdeps.mk>
.if ${DEP_RELDIR} == ${_DEP_RELDIR}
# local dependencies - needed for -jN in clean tree
.endif

View File

@ -0,0 +1,75 @@
.\" Copyright (c) 1997 David Nugent <davidn@blaze.net.au>
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, is permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice immediately at the beginning of the file, without modification,
.\" this list of conditions, and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. This work was done expressly for inclusion into FreeBSD. Other use
.\" is permitted provided this notation is included.
.\" 4. Absolutely no warranty of function or purpose is made by the author
.\" David Nugent.
.\" 5. Modifications may be freely made to this file providing the above
.\" conditions are met.
.\"
.\" $FreeBSD$
.\"
.Dd May 2, 1997
.Dt _SECURE_PATH 3
.Os
.Sh NAME
.Nm _secure_path
.Nd determine if a file appears to be secure
.Sh LIBRARY
.Lb libutil
.Sh SYNOPSIS
.In sys/types.h
.In libutil.h
.Ft int
.Fn _secure_path "const char *path" "uid_t uid" "gid_t gid"
.Sh DESCRIPTION
This function does some basic security checking on a given path.
It is intended to be used by processes running with root privileges
in order to decide whether or not to trust the contents of a given
file.
It uses a method often used to detect system compromise.
.Pp
A file is considered
.Sq secure
if it meets the following conditions:
.Bl -enum
.It
The file exists, and is a regular file (not a symlink, device
special or named pipe, etc.),
.It
Is not world writable.
.It
Is owned by the given uid or uid 0, if uid is not -1,
.It
Is not group writable or it has group ownership by the given
gid, if gid is not -1.
.El
.Sh RETURN VALUES
This function returns zero if the file exists and may be
considered secure, -2 if the file does not exist, and
-1 otherwise to indicate a security failure.
The
.Xr syslog 3
function is used to log any failure of this function, including the
reason, at LOG_ERR priority.
.Sh SEE ALSO
.Xr lstat 2 ,
.Xr syslog 3
.Sh HISTORY
Code from which this function was derived was contributed to the
.Fx
project by Berkeley Software Design, Inc.
.Sh BUGS
The checks carried out are rudimentary and no attempt is made
to eliminate race conditions between use of this function and
access to the file referenced.

View File

@ -0,0 +1,74 @@
/*-
* Based on code copyright (c) 1995,1997 by
* Berkeley Software Design, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, is permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice immediately at the beginning of the file, without modification,
* this list of conditions, and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. This work was done expressly for inclusion into FreeBSD. Other use
* is permitted provided this notation is included.
* 4. Absolutely no warranty of function or purpose is made by the authors.
* 5. Modifications may be freely made to this file providing the above
* conditions are met.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <libutil.h>
#include <stddef.h>
#include <syslog.h>
/*
* Check for common security problems on a given path
* It must be:
* 1. A regular file, and exists
* 2. Owned and writable only by root (or given owner)
* 3. Group ownership is given group or is non-group writable
*
* Returns: -2 if file does not exist,
* -1 if security test failure
* 0 otherwise
*/
int
_secure_path(const char *path, uid_t uid, gid_t gid)
{
int r = -1;
struct stat sb;
const char *msg = NULL;
if (lstat(path, &sb) < 0) {
if (errno == ENOENT) /* special case */
r = -2; /* if it is just missing, skip the log entry */
else
msg = "%s: cannot stat %s: %m";
}
else if (!S_ISREG(sb.st_mode))
msg = "%s: %s is not a regular file";
else if (sb.st_mode & S_IWOTH)
msg = "%s: %s is world writable";
else if ((int)uid != -1 && sb.st_uid != uid && sb.st_uid != 0) {
if (uid == 0)
msg = "%s: %s is not owned by root";
else
msg = "%s: %s is not owned by uid %d";
} else if ((int)gid != -1 && sb.st_gid != gid && (sb.st_mode & S_IWGRP))
msg = "%s: %s is group writeable by non-authorised groups";
else
r = 0;
if (msg != NULL)
syslog(LOG_ERR, msg, "_secure_path", path, uid);
return r;
}

44
tools/libutil/auth.c Normal file
View File

@ -0,0 +1,44 @@
/*
* Simple authentication database handling code.
*
* Copyright (c) 1998
* Jordan Hubbard. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer,
* verbatim and that no modifications are made prior to this
* point in the file.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR HIS PETS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, LIFE OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <stdlib.h>
#include <libutil.h>
char *
auth_getval(const char *name)
{
(void)name;
return (NULL);
}

View File

@ -0,0 +1,87 @@
.\" Copyright (c) 2007 Eric Anderson <anderson@FreeBSD.org>
.\" Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
.\"
.Dd August 15, 2010
.Dt EXPAND_NUMBER 3
.Os
.Sh NAME
.Nm expand_number
.Nd format a number from human readable form
.Sh LIBRARY
.Lb libutil
.Sh SYNOPSIS
.In libutil.h
.Ft int
.Fo expand_number
.Fa "const char *buf" "uint64_t *num"
.Fc
.Sh DESCRIPTION
The
.Fn expand_number
function unformats the
.Fa buf
string and stores a unsigned 64-bit quantity at address pointed out by the
.Fa num
argument.
.Pp
The
.Fn expand_number
function
is case-insensitive and
follows the SI power of two convention.
.Pp
The prefixes are:
.Bl -column "Prefix" "Description" "1000000000000000000" -offset indent
.It Sy "Prefix" Ta Sy "Description" Ta Sy "Multiplier"
.It Li K Ta No kilo Ta 1024
.It Li M Ta No mega Ta 1048576
.It Li G Ta No giga Ta 1073741824
.It Li T Ta No tera Ta 1099511627776
.It Li P Ta No peta Ta 1125899906842624
.It Li E Ta No exa Ta 1152921504606846976
.El
.Sh RETURN VALUES
.Rv -std
.Sh ERRORS
The
.Fn expand_number
function will fail if:
.Bl -tag -width Er
.It Bq Er EINVAL
The given string contains no digits.
.It Bq Er EINVAL
An unrecognized prefix was given.
.It Bq Er ERANGE
Result doesn't fit into 64 bits.
.El
.Sh SEE ALSO
.Xr humanize_number 3
.Sh HISTORY
The
.Fn expand_number
function first appeared in
.Fx 6.3 .

View File

@ -0,0 +1,93 @@
/*-
* Copyright (c) 2007 Eric Anderson <anderson@FreeBSD.org>
* Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <ctype.h>
#include <errno.h>
#include <inttypes.h>
#include <libutil.h>
#include <stdint.h>
int
expand_number(const char *buf, uint64_t *num)
{
char *endptr;
uintmax_t umaxval;
uint64_t number;
unsigned shift;
int serrno;
serrno = errno;
errno = 0;
umaxval = strtoumax(buf, &endptr, 0);
if (umaxval > UINT64_MAX)
errno = ERANGE;
if (errno != 0)
return (-1);
errno = serrno;
number = umaxval;
switch (tolower((unsigned char)*endptr)) {
case 'e':
shift = 60;
break;
case 'p':
shift = 50;
break;
case 't':
shift = 40;
break;
case 'g':
shift = 30;
break;
case 'm':
shift = 20;
break;
case 'k':
shift = 10;
break;
case 'b':
case '\0': /* No unit. */
*num = number;
return (0);
default:
/* Unrecognized unit. */
errno = EINVAL;
return (-1);
}
if ((number << shift) >> shift != number) {
/* Overflow */
errno = ERANGE;
return (-1);
}
*num = number << shift;
return (0);
}

101
tools/libutil/flopen.3 Normal file
View File

@ -0,0 +1,101 @@
.\"-
.\" Copyright (c) 2007 Dag-Erling Coïdan Smørgrav
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
.\"
.Dd June 6, 2009
.Dt FLOPEN 3
.Os
.Sh NAME
.Nm flopen
.Nd "Reliably open and lock a file"
.Sh LIBRARY
.Lb libutil
.Sh SYNOPSIS
.In sys/fcntl.h
.In libutil.h
.Ft int
.Fn flopen "const char *path" "int flags"
.Ft int
.Fn flopen "const char *path" "int flags" "mode_t mode"
.Sh DESCRIPTION
The
.Fn flopen
function opens or creates a file and acquires an exclusive lock on it.
It is essentially equivalent with calling
.Fn open
with the same parameters followed by
.Fn flock
with an
.Va operation
argument of
.Dv LOCK_EX ,
except that
.Fn flopen
will attempt to detect and handle races that may occur between opening
/ creating the file and locking it.
Thus, it is well suited for opening lock files, PID files, spool
files, mailboxes and other kinds of files which are used for
synchronization between processes.
.Pp
If
.Va flags
includes
.Dv O_NONBLOCK
and the file is already locked,
.Fn flopen
will fail and set
.Va errno
to
.Dv EWOULDBLOCK .
.Pp
As with
.Fn open ,
the additional
.Va mode
argument is required if
.Va flags
includes
.Dv O_CREAT .
.Sh RETURN VALUES
If successful,
.Fn flopen
returns a valid file descriptor.
Otherwise, it returns -1, and sets
.Va errno
as described in
.Xr flock 2
and
.Xr open 2 .
.Sh SEE ALSO
.Xr errno 2 ,
.Xr flock 2 ,
.Xr open 2
.Sh AUTHORS
.An -nosplit
The
.Nm
function and this manual page were written by
.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org .

Some files were not shown because too many files have changed in this diff Show More