]> www.infradead.org Git - users/dhowells/kafs-utils.git/commitdiff
midlayer: VL server querying/caching services
authorDavid Howells <dhowells@redhat.com>
Fri, 11 Oct 2019 14:35:46 +0000 (15:35 +0100)
committerDavid Howells <dhowells@redhat.com>
Fri, 5 May 2023 10:53:26 +0000 (11:53 +0100)
Add a couple of layers for VLDB querying and management.

The lower layer wraps collections of similar VL querying RPC calls, that
work out which variant to call and extract the retrieved data out into a
common data type.  Such functions include:

kafs_VL_GetAddrs_by_uuid()
kafs_VL_GetAddrs_by_addr()
kafs_VL_GetAddrs_by_index()
kafs_VL_GetEntryByName()
kafs_VL_ListAttributes()

On top of that is a middle layer that may call sequences of the above and
add caching of the results:

kafs_open_vl_service()
kafs_probe_vl_service()
kafs_look_up_volume_by_name()
kafs_look_up_volumes_by_attributes()
kafs_look_up_fileserver_by_uuid()
kafs_look_up_fileserver_by_addr()
kafs_look_up_fileserver_by_index()
kafs_map_volume_locations()

Signed-off-by: David Howells <dhowells@redhat.com>
kafs/Makefile
kafs/kafs.H
kafs/vl_fileservers.C [new file with mode: 0644]
kafs/vl_probe.C [new file with mode: 0644]
kafs/vl_volumes.C [new file with mode: 0644]
kafs/vlservice.H [new file with mode: 0644]

index 3dca36271127ec827293603fa701140bf1514455..28b9147abc575aa26ed78c15372d396276e332e7 100644 (file)
@@ -8,7 +8,10 @@ CORE_SRCS := \
        arg_completion.C \
        arg_parse.C \
        display_error.C \
-       misc.C
+       misc.C \
+       vl_fileservers.C \
+       vl_probe.C \
+       vl_volumes.C
 
 BOS_SRCS := \
        bos.C \
index 3011c181a859fed5db7dd607439492d4edb0c56e..2a12dbfe93b97f94f8cbf84c04bb333d3d81cc88 100644 (file)
@@ -26,6 +26,9 @@ namespace afs {
 class opr_time;
 constexpr unsigned int FS_PORT         = 7000; /* AFS file server port */
 constexpr unsigned int FS_SERVICE      = 1;    /* AFS File Service ID */
+constexpr unsigned int VL_PORT         = 7003; /* Volume location service port */
+constexpr unsigned int VL_SERVICE      = 52;   /* Service ID for the AFS Volume Location service */
+constexpr unsigned int YFS_VL_SERVICE  = 2503; /* Service ID for AuriStor upgraded VL service */
 }
 
 typedef unsigned long long Volume_id;
@@ -53,6 +56,26 @@ public:
        inline time_t seconds() const { return ts.tv_sec; }
 };
 
+/*
+ * Fileserver site
+ */
+class FS_site : public rxrpc::refcount {
+public:
+       rxrpc::Uuid                     uuid;
+       bool                            inited:1;       /* T if initialised */
+       bool                            has_uuid:1;
+       std::string                     name;           /* Canonical server name */
+       std::vector<sockaddr_rxrpc>     fs_addrs;       /* Fileserver addresses */
+       std::vector<sockaddr_rxrpc>     vs_addrs;       /* Volume server addresses */
+
+       FS_site()
+               {
+                       inited = false;
+                       has_uuid = false;
+               }
+       ~FS_site();
+};
+
 class Context : public rxrpc::refcount {
 public:
        rxrpc::ref<rxrpc::Endpoint> endpoint;
@@ -62,6 +85,8 @@ public:
        struct kafs_cell        *cell_db;
        bool                    no_resolve;     /* Don't resolve addresses for display */
 
+       std::vector<rxrpc::ref<FS_site>> fs_sites; /* Fileserver sites */
+
        rxrpc::security_auth_level sec_level;   /* Security level */
 
        Context();
diff --git a/kafs/vl_fileservers.C b/kafs/vl_fileservers.C
new file mode 100644 (file)
index 0000000..445eda7
--- /dev/null
@@ -0,0 +1,489 @@
+/* Fileserver/volumeserver record lookup and caching.
+ *
+ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <iostream>
+#include <fmt/core.h>
+#include <netdb.h>
+#include <arpa/inet.h>
+#include "vlservice.H"
+#include "afs_xg.H"
+
+using rxrpc::ref;
+using kafs::VL_service;
+using kafs::FS_site;
+using kafs::afs::afsUUID;
+
+namespace kafs {
+template<class T>
+static ref<FS_site> cache_fileserver(Context *ctx,
+                                    const T &a,
+                                    ref<FS_site> &candidate,
+                                    bool (*test)(const T &a, const FS_site *b));
+static void addrs_to_addrlist(std::vector<uint32_t> addrlist,
+                             unsigned short port,
+                             unsigned short service,
+                             std::vector<sockaddr_rxrpc> &alist);
+static void endpoints_to_addrlist(std::vector<afs::endpoint> &eplist,
+                                 unsigned short service,
+                                 std::vector<sockaddr_rxrpc> &alist);
+}
+
+FS_site::~FS_site()
+{
+}
+
+static bool kafs_cmp_fs_uuid(const rxrpc::Uuid &a, const FS_site *b)
+{
+       return uuid_compare(a.uuid, b->uuid.uuid) == 0;
+}
+
+static bool kafs_cmp_fs_addr(const struct sockaddr_rxrpc &a, const FS_site *b)
+{
+       const struct sockaddr_rxrpc *p;
+       unsigned int i;
+
+       for (i = 0; i < b->fs_addrs.size(); i++) {
+               p = &b->fs_addrs[i];
+               if (p->transport.family != a.transport.family)
+                       continue;
+               switch (a.transport.family) {
+               case AF_INET:
+                       if (p->transport.sin.sin_port == a.transport.sin.sin_port &&
+                           p->transport.sin.sin_addr.s_addr == a.transport.sin.sin_addr.s_addr)
+                               return true;
+                       continue;
+               case AF_INET6:
+                       if (p->transport.sin6.sin6_port != a.transport.sin6.sin6_port)
+                               continue;
+                       if (memcmp(&p->transport.sin6.sin6_addr,
+                                  &a.transport.sin6.sin6_addr,
+                                  16) == 0)
+                               return true;
+                       continue;
+               default:
+                       continue;
+               }
+       }
+       return false;
+}
+
+static bool kafs_cmp_fs(const ref<FS_site> &a, const FS_site *b)
+{
+       unsigned int i;
+
+       if (a->has_uuid && b->has_uuid && kafs_cmp_fs_uuid(a->uuid, b))
+               return true;
+
+       for (i = 0; i < a->fs_addrs.size(); i++)
+               if (kafs_cmp_fs_addr(a->fs_addrs[i], b))
+                       return true;
+       return false;
+}
+
+/*
+ * Look up a cached fileserver record by uuid.  If one is not found, the
+ * candidate fileserver will be added into the cache.  If the candidate would
+ * be a duplicate, it is discarded.
+ */
+template<class T>
+static ref<FS_site> kafs::cache_fileserver(Context *ctx,
+                                          const T &a,
+                                          ref<FS_site> &candidate,
+                                          bool (*test)(const T &a, const FS_site *b))
+{
+       std::vector<ref<FS_site>> &fs_sites = ctx->fs_sites;
+       unsigned int i = 0;
+
+       for (i = 0; i < ctx->fs_sites.size(); i++)
+               if (test(a, fs_sites[i]))
+                       return fs_sites[i];
+
+       if (candidate)
+               fs_sites.push_back(candidate);
+       return candidate;
+}
+
+/*
+ * Convert an afsUUID object into a uuid_t.
+ */
+void kafs::afsUUID_to_uuid(rxrpc::Uuid &_uuid, const afsUUID *au)
+{
+       struct afsUUID *uuid = (afsUUID *)_uuid.uuid;
+       int j;
+
+       uuid->time_low                  = htonl(au->time_low);
+       uuid->time_mid                  = htons(au->time_mid);
+       uuid->time_hi_and_version       = htons(au->time_hi_and_version);
+       uuid->clock_seq_hi_and_reserved = au->clock_seq_hi_and_reserved;
+       uuid->clock_seq_low             = au->clock_seq_low;
+       for (j = 0; j < 6; j++)
+               uuid->node[j] = au->node[j];
+}
+
+/*
+ * Convert a uuid_t into an afsUUID object.
+ */
+void kafs::uuid_to_afsUUID(struct afsUUID *au, const rxrpc::Uuid &_uuid)
+{
+       const struct afsUUID *uuid = (const struct afsUUID *)_uuid.uuid;
+       int j;
+
+       au->time_low                    = ntohl(uuid->time_low);
+       au->time_mid                    = ntohs(uuid->time_mid);
+       au->time_hi_and_version         = ntohs(uuid->time_hi_and_version);
+       au->clock_seq_hi_and_reserved   = uuid->clock_seq_hi_and_reserved;
+       au->clock_seq_low               = uuid->clock_seq_low;
+       for (j = 0; j < 6; j++)
+               au->node[j] = uuid->node[j];
+}
+
+/*
+ * Convert a list of IPv4 addresses into an address list.
+ */
+static void kafs::addrs_to_addrlist(std::vector<uint32_t> addrlist,
+                                   unsigned short port,
+                                   unsigned short service,
+                                   std::vector<sockaddr_rxrpc> &alist)
+{
+       size_t i;
+
+       alist.resize(addrlist.size());
+
+       for (i = 0; i < addrlist.size(); i++) {
+               alist[i].srx_family = AF_RXRPC;
+               alist[i].srx_service = service;
+               alist[i].transport_type = SOCK_DGRAM;
+               alist[i].transport_len = sizeof(alist[i].transport.sin);
+               alist[i].transport.sin.sin_family = AF_INET;
+               alist[i].transport.sin.sin_port = htons(port);
+               alist[i].transport.sin.sin_addr.s_addr = htonl(addrlist[i]);
+       }
+}
+
+/*
+ * Convert a list of endpoints into an address list.
+ */
+static void kafs::endpoints_to_addrlist(std::vector<afs::endpoint> &eplist,
+                                       unsigned short service,
+                                       std::vector<sockaddr_rxrpc> &alist)
+{
+       unsigned int x[5];
+       size_t i;
+
+       alist.resize(eplist.size());
+
+       for (i = 0; i < eplist.size(); i++) {
+               struct afs::endpoint *ep = &eplist[i];
+
+               alist[i].srx_family = AF_RXRPC;
+               alist[i].srx_service = service;
+               alist[i].transport_type = SOCK_DGRAM;
+
+               switch (ep->Type) {
+               case afs::ENDPOINT_IPV4:
+                       if (ep->Data.size() != 8)
+                               throw std::runtime_error(
+                                       fmt::format("Malformed IPv4 Endpoint (size={:d})",
+                                                   ep->Data.size()));
+                       memcpy(x, ep->Data.buffer, 8);
+                       alist[i].transport_len = sizeof(alist[i].transport.sin);
+                       alist[i].transport.sin.sin_family = AF_INET;
+                       alist[i].transport.sin.sin_port = htons(ntohl(x[1]));
+                       memcpy(&alist[i].transport.sin.sin_addr, x, 4);
+                       break;
+               case afs::ENDPOINT_IPV6:
+                       if (ep->Data.size() != 20)
+                               throw std::runtime_error("Malformed IPv6 Endpoint");
+                       memcpy(x, ep->Data.buffer, 20);
+                       alist[i].transport_len = sizeof(alist[i].transport.sin6);
+                       alist[i].transport.sin6.sin6_family = AF_INET6;
+                       alist[i].transport.sin6.sin6_port = htons(ntohl(x[4]));
+                       memcpy(&alist[i].transport.sin6.sin6_addr, x, 16);
+                       break;
+               default:
+                       throw std::runtime_error(
+                               fmt::format("Unsupported Endpoint type ({:d})",
+                                           ep->Type));
+               }
+       }
+}
+
+/**
+ * VL_service::VL_GetAddrs_by_uuid - Query the VLDB for a server by UUID
+ * @uuid: The UUID of the file/volume server
+ *
+ * Query the Volume Location service for a fileserver record by UUID.  The server record may
+ * be retrieved from the cache, and if not, the result will be cached.
+ *
+ */
+ref<FS_site> VL_service::VL_GetAddrs_by_uuid(const rxrpc::Uuid &uuid)
+{
+       ref<FS_site> site;
+       afs::ListAddrByAttributes laba;
+       afs::yfsServerAttributes filter;
+       std::vector<afs::endpoint> fsEndpoints, volEndpoints;
+       std::vector<uint32_t> blkaddrs;
+       afsUUID a_uuid;
+       int32_t uniquifier, nentries;
+
+       site = new FS_site;
+
+       /* Start off by trying the YFS VL.GetEndpoints RPC */
+       if (vl_service != service_yfs)
+               goto fallback_getaddrsu;
+
+       filter.type = afs::YFS_SERVER_UUID;
+       memcpy(&filter.uuid, uuid.uuid, sizeof(filter.uuid));
+
+       afs::YFSVL::GetEndpoints(&vl_params, filter, site->uuid, uniquifier,
+                                fsEndpoints, volEndpoints);
+
+       endpoints_to_addrlist(fsEndpoints, afs::FS_SERVICE, site->fs_addrs);
+       endpoints_to_addrlist(volEndpoints, afs::VOLSERVICE_ID, site->vs_addrs);
+
+       site->inited = true;
+       return site;
+
+fallback_getaddrsu:
+       /* Fall back to the AFS VL.GetAddrsU RPC */
+       memset(&laba, 0, sizeof(laba));
+       laba.Mask = afs::VLADDR_UUID;
+       uuid_to_afsUUID(&laba.uuid, uuid);
+
+       afs::VL::GetAddrsU(&vl_params, laba, a_uuid, uniquifier, nentries, blkaddrs);
+
+       afsUUID_to_uuid(site->uuid, &a_uuid);
+       addrs_to_addrlist(blkaddrs, afs::FS_PORT, afs::FS_SERVICE, site->fs_addrs);
+       addrs_to_addrlist(blkaddrs, afs::VOLSERVICE_PORT, afs::VOLSERVICE_ID, site->vs_addrs);
+       site->inited = true;
+       return site;
+}
+
+/**
+ * VL_service::VL_GetAddrs_by_addr - Query the VLDB for a server by address
+ * @addr: One of the server's endpoint addresses.
+ *
+ * Query the Volume Location service for a fileserver record by the address of one of the
+ * fileserver endpoints.
+ */
+ref<FS_site> VL_service::VL_GetAddrs_by_addr(const struct sockaddr_rxrpc &srx)
+{
+       ref<FS_site> site;
+       afs::ListAddrByAttributes laba;
+       afs::yfsServerAttributes filter;
+       std::vector<afs::endpoint> fsEndpoints, volEndpoints;
+       std::vector<uint32_t> blkaddrs;
+       afsUUID a_uuid;
+       unsigned int port;
+       uint32_t endpoint[5];
+       int32_t uniquifier, nentries;
+
+       site = new FS_site;
+
+       /* Start off by trying the YFS VL.GetEndpoints RPC */
+       if (vl_service != service_yfs)
+               goto fallback_getaddrsu;
+
+       filter.type = afs::YFS_SERVER_ENDPOINT;
+
+       switch (srx.transport.family) {
+       case AF_INET:
+               memcpy(endpoint, &srx.transport.sin.sin_addr, 4);
+               port = ntohs(srx.transport.sin.sin_port);
+               if (port == 0)
+                       port = afs::FS_PORT;
+               endpoint[1] = htonl(port);
+               filter.endpoint.Type = afs::ENDPOINT_IPV4;
+               filter.endpoint.Data.buffer = endpoint;
+               filter.endpoint.Data.buffer_size = 4 + 4;
+               break;
+       case AF_INET6:
+               memcpy(endpoint, &srx.transport.sin6.sin6_addr, 16);
+               port = ntohs(srx.transport.sin6.sin6_port);
+               if (port == 0)
+                       port = afs::FS_PORT;
+               endpoint[4] = htonl(port);
+               filter.endpoint.Type = afs::ENDPOINT_IPV6;
+               filter.endpoint.Data.buffer = endpoint;
+               filter.endpoint.Data.buffer_size = 16 + 4;
+               break;
+       default:
+               throw(fmt::format("Address family {:d} not supported\n",
+                                 srx.transport.family));
+       }
+
+       afs::YFSVL::GetEndpoints(&vl_params, filter, site->uuid, uniquifier,
+                                fsEndpoints, volEndpoints);
+
+       endpoints_to_addrlist(fsEndpoints, afs::FS_SERVICE, site->fs_addrs);
+       endpoints_to_addrlist(volEndpoints, afs::VOLSERVICE_ID, site->vs_addrs);
+       site->inited = true;
+       return site;
+
+fallback_getaddrsu:
+       /* Fall back to the AFS VL.GetAddrsU RPC */
+       memset(&laba, 0, sizeof(laba));
+       switch (srx.transport.family) {
+       case AF_INET:
+               laba.Mask = afs::VLADDR_IPADDR;
+               memcpy(&laba.ipaddr, &srx.transport.sin.sin_addr, 4);
+               break;
+       default:
+               throw std::runtime_error(fmt::format("Address family {:d} not supported\n",
+                                                    srx.transport.family));
+       }
+
+       afs::VL::GetAddrsU(&vl_params, laba, a_uuid, uniquifier, nentries, blkaddrs);
+
+       afsUUID_to_uuid(site->uuid, &a_uuid);
+       addrs_to_addrlist(blkaddrs, afs::FS_PORT, afs::FS_SERVICE, site->fs_addrs);
+       addrs_to_addrlist(blkaddrs, afs::VOLSERVICE_PORT, afs::VOLSERVICE_ID, site->vs_addrs);
+       site->inited = true;
+       return site;
+}
+
+/**
+ * VL_service::VL_GetAddrs_by_index - Query the VLDB for a server by index
+ * @server_index: The index number of the server within the cell
+ *
+ * Query the Volume Location service for a fileserver record by index.
+ */
+ref<FS_site> VL_service::VL_GetAddrs_by_index(int server_index)
+{
+       ref<FS_site> site;
+       afs::ListAddrByAttributes laba;
+       afs::yfsServerAttributes filter;
+       std::vector<afs::endpoint> fsEndpoints, volEndpoints;
+       std::vector<uint32_t> blkaddrs;
+       struct afsUUID a_uuid;
+       int32_t uniquifier, nentries;
+
+       site = new FS_site;
+
+       /* Start off by trying the YFS VL.GetEndpoints RPC */
+       if (vl_service != service_yfs)
+               goto fallback_getaddrsu;
+
+       filter.type = afs::YFS_SERVER_INDEX;
+       filter.index = server_index;
+
+       afs::YFSVL::GetEndpoints(&vl_params, filter, site->uuid, uniquifier,
+                                fsEndpoints, volEndpoints);
+
+       endpoints_to_addrlist(fsEndpoints, afs::FS_SERVICE, site->fs_addrs);
+       endpoints_to_addrlist(volEndpoints, afs::VOLSERVICE_ID, site->vs_addrs);
+       site->inited = true;
+       return site;
+
+fallback_getaddrsu:
+       /* Fall back to the AFS VL.GetAddrsU RPC */
+       memset(&laba, 0, sizeof(laba));
+       laba.Mask = afs::VLADDR_INDEX;
+       laba.index = server_index;
+
+       afs::VL::GetAddrsU(&vl_params, laba, a_uuid, uniquifier, nentries, blkaddrs);
+
+       afsUUID_to_uuid(site->uuid, &a_uuid);
+       addrs_to_addrlist(blkaddrs, afs::FS_PORT, afs::FS_SERVICE, site->fs_addrs);
+       addrs_to_addrlist(blkaddrs, afs::VOLSERVICE_PORT, afs::VOLSERVICE_ID, site->vs_addrs);
+       site->inited = true;
+       return site;
+}
+
+/**
+ * VL_service::look_up_fileserver_by_uuid - Get a server by UUID
+ * @uuid: The UUID of the file/volume server
+ *
+ * Look up a server record by UUID.  The server record may be retrieved from
+ * the cache, and if not, the result will be cached.
+ */
+ref<FS_site> VL_service::look_up_fileserver_by_uuid(const rxrpc::Uuid &uuid)
+{
+       ref<FS_site> site;
+
+       site = cache_fileserver(vl_context, uuid, site, kafs_cmp_fs_uuid);
+       if (site)
+               return site;
+
+       site = VL_GetAddrs_by_uuid(uuid);
+       return cache_fileserver(vl_context, site, site, kafs_cmp_fs);
+}
+
+/**
+ * VL_service::look_up_fileserver_by_addr - Get a server by fileserver endpoint address
+ * @addr: One of the server's endpoint addresses.
+ *
+ * Look up a server record by the address of one of the fileserver endpoints.
+ * The server record may be retrieved from the cache, and if not, the result
+ * will be cached.
+ */
+ref<FS_site> VL_service::look_up_fileserver_by_addr(const struct sockaddr_rxrpc &addr)
+{
+       ref<FS_site> site;
+
+       site = cache_fileserver(vl_context, addr, site, kafs_cmp_fs_addr);
+       if (site)
+               return site;
+
+       site = VL_GetAddrs_by_addr(addr);
+       return cache_fileserver(vl_context, site, site, kafs_cmp_fs);
+}
+
+/**
+ * VL_service::look_up_fileserver_by_index - Get a fileserver by index
+ * @server_index: The index number of the server within the cell
+ *
+ * Look up a fileserver record by index.  The server record is cached.
+ */
+ref<FS_site> VL_service::look_up_fileserver_by_index(int server_index)
+{
+       ref<FS_site> site = VL_GetAddrs_by_index(server_index);
+
+       return cache_fileserver(vl_context, site, site, kafs_cmp_fs);
+}
+
+/**
+ * VL_service::map_volume_sites - Map volume sites to servers
+ * @vldb: The volume location entry
+ *
+ * Map the locations of a group of volumes, as specified by @vldb, to the file
+ * and volume servers where the data is stored.
+ */
+void VL_service::map_volume_sites(Vldb_entry *vldb)
+{
+       struct sockaddr_rxrpc srx;
+       unsigned int i;
+
+       _enter("%s", vldb->name.c_str());
+
+       for (i = 0; i < vldb->sites.size(); i++) {
+               Vldb_site &site = vldb->sites[i];
+
+               if (site.fs_site)
+                       continue;
+
+               if (site.has_uuid) {
+                       site.fs_site = look_up_fileserver_by_uuid(site.uuid);
+
+               } else if (site.has_addr_number) {
+                       try {
+                               memset(&srx, 0, sizeof(srx));
+                               srx.transport.family = AF_INET;
+                               memcpy(&srx.transport.sin.sin_addr, &site.addr_number, 4);
+                               site.fs_site = VL_GetAddrs_by_addr(srx);
+                       } catch (const rxrpc::rx_abort &a) {
+                               std::cerr << "VL_service::map_volume_sites: abort "
+                                         << a.what() << "\n";
+                               continue;
+                       }
+               }
+       }
+}
diff --git a/kafs/vl_probe.C b/kafs/vl_probe.C
new file mode 100644 (file)
index 0000000..162bded
--- /dev/null
@@ -0,0 +1,196 @@
+/* Volume Location server probe.
+ *
+ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <fmt/core.h>
+#include "vlservice.H"
+#include "afs_xg.H"
+
+using rxrpc::ref;
+using kafs::VL_service;
+
+/**
+ * VL_service::VL_service - Open a Volume Location service for access.
+ * @ctx: The cell and authentication context
+ *
+ * Open the Volume Location service of the cell defined by the context for
+ * access.
+ */
+VL_service::VL_service(Context *ctx)
+{
+       vl_context      = ctx;
+       vl_server       = NULL;
+       vl_addr         = 0;
+       vl_service      = service_afs;
+       memset(vl_caps, 0, sizeof(vl_caps));
+
+       if (!ctx->cell_db)
+               throw std::invalid_argument("Unconfigured cell");
+
+       open_endpoint(ctx);
+       probe_vl_service();
+}
+
+/**
+ * VL_service::~VL_service - Destroy an open VL service
+ */
+VL_service::~VL_service()
+{
+}
+
+/**
+ * VL_service::probe_vl_service - Probe a Volume Location service for capabilities
+ * @ctx: The cell and authentication context
+ *
+ * Probe the VL service to find which servers are accessible, what they support
+ * and what variety of server is there.
+ */
+void VL_service::probe_vl_service()
+{
+       struct kafs_cell *cell = vl_context->cell_db;
+       struct kafs_server_addr *addrs, *addr;
+       struct kafs_server_list *vlservers = cell->vlservers;
+       struct kafs_server *servers, *server;
+       std::vector<unsigned int> caps;
+       std::error_code saved_error;
+       unsigned char *map;
+       unsigned int i, pref, nr_caps;
+
+       _enter("");
+
+       if (!vlservers || vlservers->nr_servers == 0)
+               throw std::runtime_error("Cell had no VL servers");
+
+       map = (unsigned char *)alloca(vlservers->nr_servers);
+       memset(map, 0, vlservers->nr_servers);
+       servers = vlservers->servers;
+
+try_next_server:
+       _debug("next server");
+       server = NULL;
+       pref = UINT_MAX;
+       for (i = 0; i < vlservers->nr_servers; i++) {
+               if (map[i])
+                       continue;
+               if (!server || servers[i].pref < pref) {
+                       pref = servers[i].pref;
+                       server = &servers[i];
+               }
+       }
+
+       if (!server) {
+               if (saved_error)
+                       throw std::system_error(saved_error, "No VL servers available");
+               throw std::runtime_error("No VL servers available");
+       }
+
+       addrs = server->addrs;
+       if (!addrs)
+               throw std::runtime_error("VL server has no addresses");
+       addr = &addrs[0];
+
+try_next_address:
+       _debug("addr %lu", addrs - addr);
+       memset(&vl_params.peer, 0, sizeof(vl_params.peer));
+       vl_params.endpoint              = vl_context->endpoint;
+       vl_params.security              = vl_context->security;
+       vl_params.peer.srx_family       = AF_RXRPC;
+       vl_params.peer.srx_service      = afs::VL_SERVICE;
+       vl_params.peer.transport_type   = SOCK_DGRAM;
+       vl_params.exclusive             = false;
+       vl_params.upgrade_service       = true;
+
+       switch (addr->sin.sin_family) {
+       case AF_INET:
+               memcpy(&vl_params.peer.transport, &addr->sin, sizeof(addr->sin));
+               if (!vl_params.peer.transport.sin.sin_port)
+                       vl_params.peer.transport.sin.sin_port = htons(afs::VL_PORT);
+               vl_params.peer.transport_len = sizeof(addr->sin);
+               vl_params.peer_len = sizeof(vl_params.peer);
+               break;
+       case AF_INET6:
+               memcpy(&vl_params.peer.transport, &addr->sin6, sizeof(addr->sin6));
+               if (!vl_params.peer.transport.sin6.sin6_port)
+                       vl_params.peer.transport.sin6.sin6_port = htons(afs::VL_PORT);
+               vl_params.peer.transport_len = sizeof(addr->sin6);
+               vl_params.peer_len = sizeof(vl_params.peer);
+               break;
+       default:
+               throw std::invalid_argument("Unsupported address family");
+       }
+
+       try {
+               afs::VL::GetCapabilities(&vl_params, caps);
+       } catch (const rxrpc::AbortRXGEN_OPCODE &a) {
+               vl_service = service_afs;
+               vl_params.upgrade_service = false;
+               vl_caps[0] = 0;
+               goto out;
+       } catch (const rxrpc::rx_abort &a) {
+               throw;
+       } catch (const std::system_error &e) {
+               switch (e.code().value()) {
+               case -ENONET:
+               case -ECONNRESET: /* Responded, but call expired. */
+               case -ERFKILL:
+               case -EADDRNOTAVAIL:
+               case -ENETUNREACH:
+               case -EHOSTUNREACH:
+               case -EHOSTDOWN:
+               case -ECONNREFUSED:
+               case -ETIMEDOUT:
+               case -ETIME:
+                       saved_error.assign(e.code().value(), e.code().category());
+                       addr++;
+                       if (addr - addrs >= server->nr_addrs) {
+                               map[server - servers] = 1;
+                               goto try_next_server;
+                       }
+                       goto try_next_address;
+               default:
+                       throw;
+               }
+       }
+
+       vl_params.upgrade_service = false;
+       vl_server = server;
+       vl_addr = addr - addrs;
+       switch (vl_params.service_id_used) {
+       case afs::VL_SERVICE:
+               vl_service = service_afs;
+               break;
+       case afs::YFS_VL_SERVICE:
+               vl_service = service_yfs;
+               vl_params.peer.srx_service = afs::YFS_VL_SERVICE;
+               break;
+       default:
+               throw std::runtime_error(
+                       fmt::format("Upgraded to unsupported VL service {:d}",
+                                   vl_params.service_id_used));
+       }
+
+       memset(&vl_caps, 0, sizeof(vl_caps));
+       nr_caps = caps.size();
+       if (nr_caps > sizeof(vl_caps) / sizeof(vl_caps))
+               nr_caps = sizeof(vl_caps) / sizeof(vl_caps);
+       for (i = 0; i < nr_caps; i++)
+               vl_caps[i] = caps[i];
+
+out:
+       if (debug_caps) {
+               printf("vl-caps:");
+               for (i = 0; i < sizeof(vl_caps) / sizeof(unsigned int); i++)
+                       printf(" %08x", vl_caps[i]);
+               if (vl_service == service_yfs)
+                       printf(" yfs\n");
+               else
+                       printf(" afs\n");
+       }
+}
diff --git a/kafs/vl_volumes.C b/kafs/vl_volumes.C
new file mode 100644 (file)
index 0000000..4ca2d78
--- /dev/null
@@ -0,0 +1,358 @@
+/* Volume Location handling
+ *
+ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <iostream>
+#include "vlservice.H"
+#include "afs_xg.H"
+
+using rxrpc::ref;
+using kafs::VL_service;
+
+namespace kafs {
+static void nentries_to_vldb_entries(std::vector<afs::nvldbentry> &entries,
+                                    Vldb_entry_list &vlist);
+static void uentries_to_vldb_entries(std::vector<afs::uvldbentry> &entries,
+                                    Vldb_entry_list &vlist);
+static void entries64_to_volumes(std::vector<afs::vldbentry64> &entries,
+                                Vldb_entry_list &vlist);
+}
+
+/*
+ * Initialise a site record.
+ */
+kafs::Vldb_site::Vldb_site()
+{
+       unique = 0;
+       flags = 0;
+       nr_addrs = 0;
+       addr_number = 0;
+       has_uuid = false;
+       has_unique = false;
+       has_addr_number = false;
+}
+
+/*
+ * Initialise a volume record.
+ */
+kafs::Vldb_entry::Vldb_entry(size_t nr_sites)
+{
+       name[0] = 0;
+       memset(volume_id, 0, sizeof(volume_id));
+       clone_id = 0;
+       flags = 0;
+       match_index = 0;
+       has_match_index = false;
+       sites.reserve(nr_sites);
+}
+
+/*
+ * Transcribe the information from an array of nvldbentry records into
+ * additional records in a volume list.
+ */
+static void kafs::nentries_to_vldb_entries(std::vector<afs::nvldbentry> &entries,
+                                          Vldb_entry_list &vlist)
+{
+       unsigned int i, j;
+
+       vlist.reserve(vlist.size() + entries.size());
+
+       for (i = 0; i < entries.size(); i++) {
+               afs::nvldbentry &entry = entries[i];
+
+               ref<Vldb_entry> vldb = new Vldb_entry(afs::NMAXNSERVERS);
+
+               vldb->name              = entry.name;
+               vldb->clone_id          = entry.cloneId;
+               vldb->flags             = entry.flags;
+               vldb->match_index       = entry.matchindex;
+               vldb->has_match_index   = true;
+
+               for (j = 0; j < afs::MAXTYPES; j++)
+                       vldb->volume_id[j] = entry.volumeId[j];
+
+               for (j = 0; j < afs::NMAXNSERVERS; j++) {
+                       vldb->sites.resize(vldb->sites.size() + 1);
+                       Vldb_site &site = vldb->sites[j];
+                       site.partition.id       = entry.serverPartition[j];
+                       site.flags              = entry.serverFlags[j];
+                       site.addr_number        = htonl(entry.serverNumber[j]);
+                       site.has_addr_number    = true;
+               }
+
+               vlist.push_back(vldb);
+       }
+}
+
+/*
+ * Transcribe the information from an array of uvldbentry records into
+ * additional records in a volume list.
+ */
+static void kafs::uentries_to_vldb_entries(std::vector<afs::uvldbentry> &entries,
+                                          Vldb_entry_list &vlist)
+{
+       unsigned int i, j, s;
+
+       vlist.reserve(vlist.size() + entries.size());
+
+       for (i = 0; i < entries.size(); i++) {
+               afs::uvldbentry &entry = entries[i];
+
+               ref<Vldb_entry> vldb = new Vldb_entry(afs::NMAXNSERVERS);
+
+               vldb->name              = entry.name;
+               vldb->clone_id          = entry.cloneId;
+               vldb->flags             = entry.flags;
+               vldb->match_index       = entry.matchindex;
+               vldb->has_match_index   = true;
+
+               for (j = 0; j < afs::MAXTYPES; j++)
+                       vldb->volume_id[j] = entry.volumeId[j];
+
+               s = 0;
+               for (j = 0; j < afs::NMAXNSERVERS; j++) {
+                       if (entry.serverFlags[j] == 0)
+                               continue;
+
+                       vldb->sites.resize(vldb->sites.size() + 1);
+                       Vldb_site &site = vldb->sites[s];
+
+                       afsUUID_to_uuid(site.uuid, &entry.serverNumber[j]);
+                       site.has_uuid           = true;
+                       site.partition.id       = entry.serverPartition[j];
+                       site.flags              = entry.serverFlags[j];
+                       s++;
+               }
+
+               vlist.push_back(vldb);
+       }
+}
+
+/*
+ * Transcribe the information from an vldbentry64 record into a kafs_volume
+ * record and add it into a volume list.
+ */
+static void kafs::entries64_to_volumes(std::vector<afs::vldbentry64> &entries,
+                                      Vldb_entry_list &vlist)
+{
+       unsigned int i, j;
+
+       vlist.reserve(vlist.size() + entries.size());
+
+       for (i = 0; i < entries.size(); i++) {
+               afs::vldbentry64 &entry = entries[i];
+
+               ref<Vldb_entry> vldb = new Vldb_entry(entry.locations.size());
+
+               vldb->name      = entry.name;
+               vldb->clone_id  = entry.cloneId;
+               vldb->flags     = entry.flags;
+
+               for (j = 0; j < afs::MAXTYPES; j++)
+                       vldb->volume_id[j] = entry.volumeId[j];
+
+               vldb->sites.resize(entry.locations.size());
+               for (j = 0; j < entry.locations.size(); j++) {
+                       afs::vldbentry64_location &yel = entry.locations[j];
+                       Vldb_site &site = vldb->sites[j];
+
+                       afsUUID_to_uuid(site.uuid, &yel.uuid);
+                       site.has_uuid           = true;
+                       site.unique             = yel.unique;
+                       site.has_unique         = true;
+                       site.partition.id       = yel.partition;
+                       site.flags              = yel.flags;
+               }
+
+               vlist.push_back(vldb);
+       }
+}
+
+/**
+ * VL_service::VL_GetEntryByName - Query the VLDB for a volume by name.
+ * @name: The name or ID of the volume
+ * @vlist: The record list in which to store the volume record
+ *
+ * Query the Volume Location service to find out the details of a volume group
+ * and the list of servers on which it resides.  The volume is an ID if it
+ * begins with a numeric digit and a name otherwise.
+ *
+ * If successful, the resulting record is appended to the list.
+ */
+void VL_service::VL_GetEntryByName(const std::string &name,
+                                  Vldb_entry_list &vlist)
+{
+       std::vector<afs::vldbentry64> entries64;
+       std::vector<afs::uvldbentry> uventries;
+
+       _enter("%s", name.c_str());
+
+       if (vl_service == kafs::service_yfs) {
+               entries64.resize(1);
+               afs::YFSVL::GetEntryByName64(&vl_params, name, afs::VLGET_FS_UUID,
+                                            entries64[0]);
+               entries64_to_volumes(entries64, vlist);
+               return;
+       }
+
+       uventries.resize(1);
+       afs::VL::GetEntryByNameU(&vl_params, name, uventries[0]);
+       uentries_to_vldb_entries(uventries, vlist);
+}
+
+/**
+ * VL_service::look_up_volume_by_name - Look up a volume by name.
+ * @name: The name or ID of the volume
+ * @vlist: The record list in to which the volume record will be appended
+ *
+ * Look up a record of a volume by name to retrieve the details of a volume and
+ * the list of servers on which it resides.  The volume is an ID if it begins
+ * with a numeric digit and a name otherwise.
+ *
+ * If successful, the resulting record is appended to the list.  The record may
+ * be cached locally.
+ */
+void VL_service::look_up_volume_by_name(Volume_spec &name,
+                                       Vldb_entry_list &vlist)
+{
+       _enter("");
+
+       VL_GetEntryByName(name.name, vlist);
+       map_volume_sites(vlist[vlist.size() - 1]);
+}
+
+/**
+ * VL_service::VL_ListAttributes - Query the VLDB for a list of matching volumes.
+ * @attributes: The attributes by which to filter the volume list
+ * @volume_name_pattern: An RE that is used to filter the volume list (or "")
+ * @vlist: The record list in to which volume records will be appended
+ *
+ * Query the Volume Location service to retrieve the details of a set of
+ * volumes, as filtered by the attribute set and, optionally, a name pattern.
+ *
+ * If successful, the resulting records are appended to the list.
+ */
+void VL_service::VL_ListAttributes(afs::VldbListByAttributes &attributes,
+                                  const std::string &volume_name_pattern,
+                                  Vldb_entry_list &vlist)
+{
+       std::vector<afs::uvldbentry> uentries;
+       std::vector<afs::nvldbentry> nentries;
+       std::vector<afs::vldbentry> entries;
+       int32_t entry_count, pos, next_pos;
+
+       _enter("%x", attributes.Mask);
+
+       /* Start off by trying the YFS ListAttributesU2 RPC */
+       if (vl_service != service_yfs)
+               goto fall_back_to_LAN2;
+       try {
+               pos = 0;
+               next_pos = -1;
+               do {
+                       afs::YFSVL::ListAttributesU2(&vl_params, attributes,
+                                                    volume_name_pattern, pos,
+                                                    entry_count, uentries, next_pos);
+                       uentries_to_vldb_entries(uentries, vlist);
+                       pos = next_pos;
+               } while (next_pos != -1);
+               return;
+       } catch (const rxrpc::AbortRXGEN_OPCODE &a) {
+               goto fall_back_to_LAN2;
+       }
+
+fall_back_to_LAN2:
+       /* Fall back to the ListAttributesN2 RPC */
+       try {
+               pos = 0;
+               next_pos = -1;
+               do {
+                       afs::VL::ListAttributesN2(&vl_params, attributes,
+                                                 volume_name_pattern, pos,
+                                                 entry_count, nentries, next_pos);
+                       nentries_to_vldb_entries(nentries, vlist);
+                       pos = next_pos;
+               } while (next_pos != -1);
+               return;
+       } catch (const rxrpc::AbortRXGEN_OPCODE &a) {
+               goto fall_back_to_LAN;
+       }
+
+fall_back_to_LAN:
+       /* Fall back to the ListAttributesN RPC */
+       try {
+               afs::VL::ListAttributesN(&vl_params, attributes,entry_count, nentries);
+               nentries_to_vldb_entries(nentries, vlist);
+               return;
+       } catch (const rxrpc::AbortRXGEN_OPCODE &a) {
+               goto fall_back_to_LA;
+       }
+
+fall_back_to_LA:
+       /* Fall back to the ListAttributes RPC */
+       afs::VL::ListAttributes(&vl_params, attributes, entry_count, entries);
+}
+
+/**
+ * VL_service::look_up_volumes_by_attributes - Look up a volume by attributes and/or name pattern.
+ * @fsspec: A fileserver address filter (or NULL)
+ * @partition: A partition filter (or NULL)
+ * @locked: A filter on the volume-locked flag
+ * @volume_name_pattern: An RE filter on the volume name (or NULL)
+ * @vlist: The record list in to which the volume record will be appended
+ *
+ * Look up a record of a volume by some combination of server address,
+ * partition ID, volume locked flag and a RE volume name pattern to retrieve
+ * the details of a volume and the list of servers on which it resides.
+ *
+ * If successful, the resulting records are appended to the list.  These
+ * records may be cached locally.
+ */
+void VL_service::look_up_volumes_by_attributes(Fileserver_spec &fsspec,
+                                              Partition_spec &partition,
+                                              bool locked,
+                                              const std::string &volume_name_pattern,
+                                              Vldb_entry_list &vlist)
+{
+       afs::VldbListByAttributes attr = { .Mask = 0 };
+       struct sockaddr_in *sin;
+       unsigned int i;
+
+       if (fsspec.specified) {
+               if (!fsspec.nr_addrs)
+                       throw std::invalid_argument("Unspecified fileserver address");
+               for (i = 0; i < fsspec.nr_addrs; i++) {
+                       sin = &fsspec.addrs[i].transport.sin;
+                       if (sin->sin_family == AF_INET)
+                               break;
+               }
+               if (i > fsspec.nr_addrs)
+                       throw std::invalid_argument("Filter only supports an IPv4 address");
+               attr.Mask |= afs::VLLIST_SERVER;
+
+               /* The XDR encoder will pass attr.server through htonl() */
+               attr.server = ntohl(sin->sin_addr.s_addr);
+       }
+
+       if (partition.specified) {
+               attr.Mask |= afs::VLLIST_PARTITION;
+               attr.partition = partition.id;
+       }
+       if (locked) {
+               attr.Mask |= afs::VLLIST_FLAG;
+               attr.flag = afs::VLOP_MOVE | afs::VLOP_RELEASE | afs::VLOP_BACKUP |
+                       afs::VLOP_DELETE | afs::VLOP_DUMP;
+       }
+
+       VL_ListAttributes(attr, volume_name_pattern, vlist);
+
+       for (i = 0; i < vlist.size(); i++)
+               map_volume_sites(vlist[i]);
+}
diff --git a/kafs/vlservice.H b/kafs/vlservice.H
new file mode 100644 (file)
index 0000000..6e294ca
--- /dev/null
@@ -0,0 +1,102 @@
+/* KAFS VL service mid-level client library.
+ *
+ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef VLSERVICE_H
+#define VLSERVICE_H
+
+#include "rxrpc.H"
+#include "kafs.H"
+#include "afs_xg.H"
+
+namespace kafs {
+
+namespace afs {
+class afsUUID;
+}
+
+class Vldb_site {
+public:
+       rxrpc::ref<FS_site>     fs_site;
+       rxrpc::Uuid             uuid;
+       unsigned int            unique;
+       unsigned int            flags;
+       unsigned int            nr_addrs;
+       unsigned int            addr_number;
+       bool                    has_uuid:1;
+       bool                    has_unique:1;
+       bool                    has_addr_number:1;
+       Partition_spec          partition;
+
+       Vldb_site();
+};
+
+class Vldb_entry : public rxrpc::refcount {
+public:
+       std::string             name;
+       Volume_id               volume_id[3];
+       unsigned int            clone_id;
+       unsigned int            flags;
+       unsigned int            match_index;
+       bool                    has_match_index:1;
+       std::vector<Vldb_site>  sites;
+
+       Vldb_entry(size_t nr_sites);
+};
+
+typedef std::vector<rxrpc::ref<Vldb_entry>> Vldb_entry_list;
+
+extern void afsUUID_to_uuid(rxrpc::Uuid &uuid, const afs::afsUUID *au);
+extern void uuid_to_afsUUID(afs::afsUUID *au, const rxrpc::Uuid &uuid);
+
+/*
+ * Volume location service.
+ */
+class VL_service : public rxrpc::refcount {
+public:
+       rxrpc::ref<Context>     vl_context;
+       struct kafs_server      *vl_server;     /* Preferred VL server */
+       unsigned int            vl_addr;        /* Preferred address on vl_server */
+       Service                 vl_service;     /* Service provided on that server */
+       unsigned int            vl_caps[1];     /* VL server capabilities */
+       rxrpc::Call_params      vl_params;      /* Parameters for accessing pref VL server */
+
+       /* vl_fileservers.C */
+       rxrpc::ref<FS_site> VL_GetAddrs_by_uuid(const rxrpc::Uuid &uuid);
+       rxrpc::ref<FS_site> VL_GetAddrs_by_addr(const struct sockaddr_rxrpc &srx);
+       rxrpc::ref<FS_site> VL_GetAddrs_by_index(int server_index);
+       rxrpc::ref<FS_site> look_up_fileserver_by_uuid(const rxrpc::Uuid &uuid);
+       rxrpc::ref<FS_site> look_up_fileserver_by_addr(const struct sockaddr_rxrpc &addr);
+       rxrpc::ref<FS_site> look_up_fileserver_by_index(int server_index);
+       void map_volume_sites(Vldb_entry *vldb);
+
+       /* vl_probe.C */
+       VL_service(Context *);
+       ~VL_service();
+       void probe_vl_service();
+
+       /* vl_volumes.C */
+       void VL_GetEntryByName(const std::string &name, Vldb_entry_list &vlist);
+       void VL_ListAttributes(afs::VldbListByAttributes &attributes,
+                              const std::string &volume_name_pattern,
+                              Vldb_entry_list &vlist);
+       void look_up_volume_by_name(Volume_spec &name,
+                                   Vldb_entry_list &volumes);
+
+       void look_up_volumes_by_attributes(Fileserver_spec &fileserver,
+                                          Partition_spec &partition,
+                                          bool locked,
+                                          const std::string &volume_name_pattern,
+                                          Vldb_entry_list &volumes);
+};
+
+} /* end namespace kafs */
+
+#endif /* VLSERVICE_H */