Server

When we install Linkerd, it will deploy a destination controller in the destination pods. This controller is responsible for service discovery and routing. t watches Kubernetes resources (Services, EndpointSlices, Pods, ExternalWorkloads, etc.) via shared informers, builds a local cache of endpoints, and serves gRPC requests from data-plane proxies. It will server the requests coming from the proxies via gRPC on port 8086, and expose the metrics on port 9996.

kubectl get pod -n linkerd       linkerd-destination-86f8d8498b-sbtdl -o yaml
apiVersion: v1
kind: Pod
metadata:
  name: linkerd-destination-86f8d8498b-sbtdl
  namespace: linkerd
spec:
  automountServiceAccountToken: false
  containers:
  - args:
    - destination
    - -addr=:8086
    - -controller-namespace=linkerd
    - -enable-h2-upgrade=true
    - -log-level=debug
    - -log-format=plain
    - -enable-endpoint-slices=true
    - -cluster-domain=cluster.local
    - -identity-trust-domain=cluster.local
    - -default-opaque-ports=25,587,3306,4444,5432,6379,9300,11211
    - -enable-ipv6=false
    - -enable-pprof=false
    - -ext-endpoint-zone-weights
    image: ghcr.io/buoyantio/controller:enterprise-2.17.1
    name: destination
    ports:
    - containerPort: 8086
      name: grpc
      protocol: TCP
    - containerPort: 9996
      name: admin-http
      protocol: TCP
	...
    volumeMounts:
    - mountPath: /var/run/secrets/kubernetes.io/serviceaccount
      name: kube-api-access
      readOnly: true

When the container starts, it will:

Start a new server to export the metrics via HTTP with adminServer := admin.NewServer(*metricsAddr, *enablePprof, &ready)
Create a K8s API Client k8Client, err := pkgK8s.NewAPI(*kubeConfigPath, "", "", []string{}, 0) that is going to be later initialized diferently based on the enableEndpointSlices parameter.
Validate and use the the parameter passed to the container to initialize a new server destination.NewServer(*addr, config, k8sAPI, metadataAPI, clusterStore, done)
Start the cluster store watcher directly via clusterStore, err := watcher.NewClusterStore(k8Client, *controllerNamespace, *enableEndpointSlices)

Interactions with the Kuberentes API

When K8s API Client starts, it use the k8s.io/client-go GO module to build one shared informer for every resource kinds it cares about (CronJobs, Pods, Services, etc.) and stores the handle in the API struct, as well as check and records a HasSynced check for each, and registers a Prometheus gauge that reports the current key count per cache.

func newAPI(
	k8sClient kubernetes.Interface,
	dynamicClient dynamic.Interface,
	l5dCrdClient l5dcrdclient.Interface,
	sharedInformers informers.SharedInformerFactory,
	cluster string,
	resources ...APIResource,
) *API {
	var l5dCrdSharedInformers l5dcrdinformer.SharedInformerFactory
	if l5dCrdClient != nil {
		l5dCrdSharedInformers = l5dcrdinformer.NewSharedInformerFactory(l5dCrdClient, ResyncTime)
	}
	api := &API{
		Client:                k8sClient,
		L5dClient:             l5dCrdClient,
		DynamicClient:         dynamicClient,
		syncChecks:            make([]cache.InformerSynced, 0),
		sharedInformers:       sharedInformers,
		l5dCrdSharedInformers: l5dCrdSharedInformers,
	}
	informerLabels := prometheus.Labels{
		"cluster": cluster,
	}
	for _, resource := range resources {
		switch resource {
		case CJ:
			api.cj = sharedInformers.Batch().V1().CronJobs()
			api.syncChecks = append(api.syncChecks, api.cj.Informer().HasSynced)
			api.promGauges.addInformerSize(k8s.CronJob, informerLabels, api.cj.Informer())
		case CM:
			api.cm = sharedInformers.Core().V1().ConfigMaps()
			api.syncChecks = append(api.syncChecks, api.cm.Informer().HasSynced)
			api.promGauges.addInformerSize(k8s.ConfigMap, informerLabels, api.cm.Informer())
		case Deploy:
			api.deploy = sharedInformers.Apps().V1().Deployments()
			api.syncChecks = append(api.syncChecks, api.deploy.Informer().HasSynced)
			api.promGauges.addInformerSize(k8s.Deployment, informerLabels, api.deploy.Informer())
		case DS:
			api.ds = sharedInformers.Apps().V1().DaemonSets()
			api.syncChecks = append(api.syncChecks, api.ds.Informer().HasSynced)
			api.promGauges.addInformerSize(k8s.DaemonSet, informerLabels, api.ds.Informer())
		case Endpoint:
			api.endpoint = sharedInformers.Core().V1().Endpoints()
			api.syncChecks = append(api.syncChecks, api.endpoint.Informer().HasSynced)
			api.promGauges.addInformerSize(k8s.Endpoints, informerLabels, api.endpoint.Informer())
		case ES:
			api.es = sharedInformers.Discovery().V1().EndpointSlices()
			api.syncChecks = append(api.syncChecks, api.es.Informer().HasSynced)
			api.promGauges.addInformerSize(k8s.EndpointSlices, informerLabels, api.es.Informer())
		case ExtWorkload:
			if l5dCrdSharedInformers == nil {
				panic("Linkerd CRD shared informer not configured")
			}
			api.ew = l5dCrdSharedInformers.Externalworkload().V1beta1().ExternalWorkloads()
			api.syncChecks = append(api.syncChecks, api.ew.Informer().HasSynced)
			api.promGauges.addInformerSize(k8s.ExtWorkload, informerLabels, api.ew.Informer())
		case Job:
			api.job = sharedInformers.Batch().V1().Jobs()
			api.syncChecks = append(api.syncChecks, api.job.Informer().HasSynced)
			api.promGauges.addInformerSize(k8s.Job, informerLabels, api.job.Informer())
		case Link:
			if l5dCrdSharedInformers == nil {
				panic("Linkerd CRD shared informer not configured")
			}
			api.link = l5dCrdSharedInformers.Link().V1alpha3().Links()
			api.syncChecks = append(api.syncChecks, api.link.Informer().HasSynced)
			api.promGauges.addInformerSize(k8s.Link, informerLabels, api.link.Informer())
		case MWC:
			api.mwc = sharedInformers.Admissionregistration().V1().MutatingWebhookConfigurations()
			api.syncChecks = append(api.syncChecks, api.mwc.Informer().HasSynced)
			api.promGauges.addInformerSize(k8s.MutatingWebhookConfig, informerLabels, api.mwc.Informer())
		case NS:
			api.ns = sharedInformers.Core().V1().Namespaces()
			api.syncChecks = append(api.syncChecks, api.ns.Informer().HasSynced)
			api.promGauges.addInformerSize(k8s.Namespace, informerLabels, api.ns.Informer())
		case Pod:
			api.pod = sharedInformers.Core().V1().Pods()
			api.syncChecks = append(api.syncChecks, api.pod.Informer().HasSynced)
			api.promGauges.addInformerSize(k8s.Pod, informerLabels, api.pod.Informer())
		case RC:
			api.rc = sharedInformers.Core().V1().ReplicationControllers()
			api.syncChecks = append(api.syncChecks, api.rc.Informer().HasSynced)
			api.promGauges.addInformerSize(k8s.ReplicationController, informerLabels, api.rc.Informer())
		case RS:
			api.rs = sharedInformers.Apps().V1().ReplicaSets()
			api.syncChecks = append(api.syncChecks, api.rs.Informer().HasSynced)
			api.promGauges.addInformerSize(k8s.ReplicaSet, informerLabels, api.rs.Informer())
		case SP:
			if l5dCrdSharedInformers == nil {
				panic("Linkerd CRD shared informer not configured")
			}
			api.sp = l5dCrdSharedInformers.Linkerd().V1alpha2().ServiceProfiles()
			api.syncChecks = append(api.syncChecks, api.sp.Informer().HasSynced)
			api.promGauges.addInformerSize(k8s.ServiceProfile, informerLabels, api.sp.Informer())
		case Srv:
			if l5dCrdSharedInformers == nil {
				panic("Linkerd CRD shared informer not configured")
			}
			api.srv = l5dCrdSharedInformers.Server().V1beta3().Servers()
			api.syncChecks = append(api.syncChecks, api.srv.Informer().HasSynced)
			api.promGauges.addInformerSize(k8s.Server, informerLabels, api.srv.Informer())
		case SS:
			api.ss = sharedInformers.Apps().V1().StatefulSets()
			api.syncChecks = append(api.syncChecks, api.ss.Informer().HasSynced)
			api.promGauges.addInformerSize(k8s.StatefulSet, informerLabels, api.ss.Informer())
		case Svc:
			api.svc = sharedInformers.Core().V1().Services()
			api.syncChecks = append(api.syncChecks, api.svc.Informer().HasSynced)
			api.promGauges.addInformerSize(k8s.Service, informerLabels, api.svc.Informer())
		case Node:
			api.node = sharedInformers.Core().V1().Nodes()
			api.syncChecks = append(api.syncChecks, api.node.Informer().HasSynced)
			api.promGauges.addInformerSize(k8s.Node, informerLabels, api.node.Informer())
		case Secret:
			api.secret = sharedInformers.Core().V1().Secrets()
			api.syncChecks = append(api.syncChecks, api.secret.Informer().HasSynced)
			api.promGauges.addInformerSize(k8s.Secret, informerLabels, api.secret.Informer())
		}
	}
	return api
}

When the Sync function is called, each informer request to get the initial snapshot, and then opens long-lived watch streams (parameter watch=0) so it can receive change events as they happen. If no events arrive withing 10 minutes (defined by the constant ResyncTime = 10 * time.Minute), it will requets a new compelte snapshot to the Kubernetes API Server.

kubectl logs -n linkerd deploy/linkerd-destination -c destination --follow
...
time="2025-05-19T09:10:15Z" level=info msg="GET https://10.247.0.1:443/apis/workload.linkerd.io/v1beta1/externalworkloads?allowWatchBookmarks=true&resourceVersion=740&timeout=7m47s&timeoutSeconds=467&watch=true 200 OK in 0 milliseconds"
time="2025-05-19T09:10:15Z" level=info msg="GET https://10.247.0.1:443/apis/discovery.k8s.io/v1/endpointslices?allowWatchBookmarks=true&resourceVersion=751&timeout=9m16s&timeoutSeconds=556&watch=true 200 OK in 0 milliseconds"
time="2025-05-19T09:10:15Z" level=info msg="GET https://10.247.0.1:443/apis/batch/v1/jobs?allowWatchBookmarks=true&resourceVersion=740&timeout=9m10s&timeoutSeconds=550&watch=true 200 OK in 0 milliseconds"
time="2025-05-19T09:10:15Z" level=info msg="GET https://10.247.0.1:443/api/v1/endpoints?allowWatchBookmarks=true&resourceVersion=740&timeout=9m35s&timeoutSeconds=575&watch=true 200 OK in 1 milliseconds"
time="2025-05-19T09:10:15Z" level=info msg="GET https://10.247.0.1:443/apis/linkerd.io/v1alpha2/serviceprofiles?allowWatchBookmarks=true&resourceVersion=740&timeout=7m19s&timeoutSeconds=439&watch=true 200 OK in 0 milliseconds"
time="2025-05-19T09:10:15Z" level=info msg="GET https://10.247.0.1:443/apis/policy.linkerd.io/v1beta3/servers?allowWatchBookmarks=true&resourceVersion=740&timeout=8m16s&timeoutSeconds=496&watch=true 200 OK in 0 milliseconds"
time="2025-05-19T09:10:15Z" level=info msg="GET https://10.247.0.1:443/api/v1/services?allowWatchBookmarks=true&resourceVersion=740&timeout=8m50s&timeoutSeconds=530&watch=true 200 OK in 0 milliseconds"
time="2025-05-19T09:10:15Z" level=info msg="GET https://10.247.0.1:443/api/v1/pods?allowWatchBookmarks=true&resourceVersion=741&timeout=8m55s&timeoutSeconds=535&watch=true 200 OK in 0 milliseconds"
time="2025-05-19T09:10:15Z" level=info msg="GET https://10.247.0.1:443/apis/apps/v1/replicasets?allowWatchBookmarks=true&resourceVersion=739&timeout=7m4s&timeoutSeconds=424&watch=true 200 OK in 0 milliseconds"
time="2025-05-19T09:10:15Z" level=info msg="GET https://10.247.0.1:443/apis/batch/v1/jobs?allowWatchBookmarks=true&resourceVersion=740&timeout=7m29s&timeoutSeconds=449&watch=true 200 OK in 0 milliseconds"
time="2025-05-19T09:10:15Z" level=info msg="GET https://10.247.0.1:443/api/v1/nodes?allowWatchBookmarks=true&resourceVersion=740&timeout=8m30s&timeoutSeconds=510&watch=true 200 OK in 0 milliseconds"
time="2025-05-19T09:10:15Z" level=info msg="GET https://10.247.0.1:443/api/v1/namespaces/linkerd/secrets?allowWatchBookmarks=true&resourceVersion=740&timeout=9m7s&timeoutSeconds=547&watch=true 200 OK in 0 milliseconds"

Each informer owns a thread-safe local cache where it store the data returned by the Kubernetes API Server.

By itself, an informer does not actually notify your business logic when things change; it only populates a local cache and lets you query it. For this reason the Controller’s source code has several watchers on top of the related informers that registers event handlers on those informers, so it actually gets notified when the cache is updated. At the time of this writing, there are currently 5 main watchers:

Endpoints Watcher
Profile Watcher
Workload Watcher
Opaque Port sWatcher
Federated Service Watcher

Destination Server

The server.go file is the “glue” that ties all of the individual watchers and translators together into a single gRPC server that speaks the Destination API to the data‐plane proxies. Its contructor will ensure that all of the informer are set up so that each watcher can do fast lookups.

func NewServer(
	addr string,
	config Config,
	k8sAPI *k8s.API,
	metadataAPI *k8s.MetadataAPI,
	clusterStore *watcher.ClusterStore,
	shutdown <-chan struct{},
) (*grpc.Server, error) {
	log := logging.WithFields(logging.Fields{
		"addr":      addr,
		"component": "server",
	})
	err := watcher.InitializeIndexers(k8sAPI)
	if err != nil {
		return nil, err
	}
	workloads, err := watcher.NewWorkloadWatcher(k8sAPI, metadataAPI, log, config.EnableEndpointSlices, config.DefaultOpaquePorts)
	if err != nil {
		return nil, err
	}
	endpoints, err := watcher.NewEndpointsWatcher(k8sAPI, metadataAPI, log, config.EnableEndpointSlices, "local")
	if err != nil {
		return nil, err
	}
	opaquePorts, err := watcher.NewOpaquePortsWatcher(k8sAPI, log, config.DefaultOpaquePorts)
	if err != nil {
		return nil, err
	}
	profiles, err := watcher.NewProfileWatcher(k8sAPI, log)
	if err != nil {
		return nil, err
	}
	federatedServices, err := newFederatedServiceWatcher(k8sAPI, metadataAPI, &config, clusterStore, endpoints, log)
	if err != nil {
		return nil, err
	}
	srv := server{
		pb.UnimplementedDestinationServer{},
		config,
		workloads,
		endpoints,
		opaquePorts,
		profiles,
		clusterStore,
		federatedServices,
		k8sAPI,
		metadataAPI,
		log,
		shutdown,
	}
	s := prometheus.NewGrpcServer(grpc.MaxConcurrentStreams(0))
	pb.RegisterDestinationServer(s, &srv)
	return s, nil
}

It will then expose two endpoints: Get and GetProfile.

Get Endpoint

Get is the “address-discovery” endpoint of the Destination API. A proxy calls it when it needs a live list of socket addresses (plus TLS and protocol hints) for some <host>:<port> it is about to dial.

First the server decorates its logger with the caller’s TCP address (handy when dozens of sidecars are connected) and tries to parse the context token the proxy may have sent. That token, if present, looks like {"ns":"shop-front","nodeName":"worker-2"}; decoding it now means later code can choose endpoints that live in the same topology zone as the caller, or apply tenant-scoped policy overrides.

Next it ensure that the incoming destination.Path contains an authority (the <host>:<port> bit). If the authority is missing the call is rejected immediately with InvalidArgument, because without a host there is nothing to profile. When the authority is there it is split into host and port via getHostAndPort. The port must fall inside the legal TCP range (1 through 65535) otherwise the server again returns InvalidArgument.

Next, will use the parseK8sServiceName function to splits the FQDN and—based on the length of the resulting array does one of two things:

Service-only: <svc-name>.<ns>.svc.<cluster-domain>. It returns the service obtained from the service watcher. Feed it web.default.svc.cluster.local and you get back (ServiceID{Name:“web”, Namespace:“default”}, instanceID="")
Hostname + service: <pod-hostname>.<svc-name>.<ns>.svc.<cluster-domain>. It returns both the service obtained from the service watcher and an instanceID (the pod’s name), so updates can be scoped to that specific pod if needed. feed it web-0.web.default.svc.cluster.local and you get (ServiceID{Name:“web”, Namespace:“default”}, “web-0”).

That instanceID tells the controller whether the proxy is aiming at the whole Service or at one specific pod.

Armed with the ServiceID, the server does a cached lookup via the shared informer lister. A missing Service produces NotFound. A present Service is then examined to decide what kind it is:

Federated service
Remote-discovery service
Local service

Federated Service

If the Service has either the multicluster.linkerd.io/local-discovery or multicluster.linkerd.io/remote-discovery annotation, it processes it as a federated service. The value of multicluster.linkerd.io/remote-discovery is a comma-separated list such as <svcA>@clusterA,<svcB>@clusterB. A helper called remoteDiscoveryIDs breaks that string on commas, then on @, producing a slice of structs that pair the remote service name with the cluster name. Invalid fragments are logged and skipped.

Then the call is handed off to the federatedServiceWatcher that will look for in‐memory “federatedService” object for the given <namespace>/<service>. This is a long-lived object that self-indexes all Services which currently carry one of those annotations. It maintains a map services[ServiceID] → *federatedService and keeps that map up-to-date with event-handler callbacks registered on the core Service informer.

A federatedService object in turn owns a slice of subscribers—each gRPC stream that is currently watching that Service—as well as:

localDiscovery: the name of the Service inside this cluster that should be merged in (can be empty).
remoteDiscovery[]: a parsed list of cluster-qualified remote targets, each of the form <svc>@<cluster-name>

For every <svc>@<cluster> item it will asks the cluster-store for two things:

a remote EndpointsWatcher that streams address updates coming from that foreign control-plane.
a ClusterConfig object that tells us the remote mesh’s trust-domain and DNS suffix.

It then builds a fully qualified authority of the form <remoteSvc>.<namespace>.svc.<remoteClusterDomain>:<port> so a pod in the local cluster will dial, for example, checkout-east.default.svc.east.example.net:50051; that string is passed to the translator constructor. Finally subscribes that translator to the remote EndpointsWatcher, keyed by the remote service’s ServiceID. The watcher pushes add, remove and no-endpoints events, the translator turns them into Destination.Get protobuf updates, and those updates are streamed straight to the waiting proxy.

If the Service also carries multicluster.linkerd.io/local-discovery the string in that annotation is treated as the name of another Service that lives in this cluster and should be merged in alongside the remote clusters. A second translator is spun up—this one with endpoint filtering enabled—and subscribed to the ordinary local EndpointsWatcher.

Remote Services

If the service has the label multicluster.linkerd.io/cluster-name=<cluster>. The controller looks up that cluster in its clusterStore, fetches a remote EndpointsWatcher and a bit of identity/DNS config, spins up one endpointTranslator, and subscribes it to the remote watcher. Endpoints appearing in the other cluster are turned into Update.Add messages; disappearing ones become Update.Remove.

Local Services

If neither “federated” nor “remote discovery” applies, it treats the Service as a normal local Kubernetes service. A translator is created with enableEndpointFiltering=true, which means it will later filter endpoints down to the caller’s topology zone if zone-aware hints exist. The translator is then subscribed to the local EndpointsWatcher.

The translator itself is an adapter that keeps an internal snapshot of “currently alive” addresses, watches for adds/removes/NoEndpoints events, and pushes the delta down the gRPC stream as pb.Update messages (Add {WeightedAddrSet} or Remove {AddrSet}). It also enriches each address with:

TLS identity (if the pod is meshed and in the same trust domain),
Protocol hints (H2 vs opaque transport),
Zone-locality labels,
and optional weight tweaks so a proxy prefers same-zone endpoints.

All three discovery branches create a streamEnd channel and hand it to the translator. If the proxy stops reading and the translator’s bounded queue overflows, it just closes that channel, increments a Prometheus counter, and lets Get tear the stream down; the proxy reconnects and picks up a fresh snapshot, keeping the controller safe from unbounded memory growth.

func (s *server) Get(dest *pb.GetDestination, stream pb.Destination_GetServer) error {
	log := s.log
	client, _ := peer.FromContext(stream.Context())
	if client != nil {
		log = log.WithField("remote", client.Addr)
	}
	var token contextToken
	if dest.GetContextToken() != "" {
		log.Debugf("Dest token: %q", dest.GetContextToken())
		token = s.parseContextToken(dest.GetContextToken())
		log = log.WithFields(logging.Fields{"context-pod": token.Pod, "context-ns": token.Ns})
	}
	log.Debugf("Get %s", dest.GetPath())
	streamEnd := make(chan struct{})
	host, port, err := getHostAndPort(dest.GetPath())
	if err != nil {
		log.Debugf("Invalid service %s", dest.GetPath())
		return status.Errorf(codes.InvalidArgument, "Invalid authority: %s", dest.GetPath())
	}
	if ip := net.ParseIP(host); ip != nil {
		return status.Errorf(codes.InvalidArgument, "IP queries not supported by Get API: host=%s", host)
	}
	service, instanceID, err := parseK8sServiceName(host, s.config.ClusterDomain)
	if err != nil {
		log.Debugf("Invalid service %s", dest.GetPath())
		return status.Errorf(codes.InvalidArgument, "Invalid authority: %s", dest.GetPath())
	}
	svc, err := s.k8sAPI.Svc().Lister().Services(service.Namespace).Get(service.Name)
	if err != nil {
		if kerrors.IsNotFound(err) {
			log.Debugf("Service not found %s", service)
			return status.Errorf(codes.NotFound, "Service %s.%s not found", service.Name, service.Namespace)
		}
		log.Debugf("Failed to get service %s: %v", service, err)
		return status.Errorf(codes.Internal, "Failed to get service %s", dest.GetPath())
	}
	if isFederatedService(svc) {
		remoteDiscovery := svc.Annotations[labels.RemoteDiscoveryAnnotation]
		localDiscovery := svc.Annotations[labels.LocalDiscoveryAnnotation]
		log.Debugf("Federated service discovery, remote:[%s] local:[%s]", remoteDiscovery, localDiscovery)
		err := s.federatedServices.Subscribe(svc.Name, svc.Namespace, port, token.NodeName, instanceID, stream, streamEnd)
		if err != nil {
			log.Errorf("Failed to subscribe to federated service %q: %s", dest.GetPath(), err)
			return err
		}
		defer s.federatedServices.Unsubscribe(svc.Name, svc.Namespace, stream)
	} else if cluster, found := svc.Labels[labels.RemoteDiscoveryLabel]; found {
		log.Debug("Remote discovery service detected")
		remoteSvc, found := svc.Labels[labels.RemoteServiceLabel]
		if !found {
			log.Debugf("Remote discovery service missing remote service name %s", service)
			return status.Errorf(codes.FailedPrecondition, "Remote discovery service missing remote service name %s", dest.GetPath())
		}
		remoteWatcher, remoteConfig, found := s.clusterStore.Get(cluster)
		if !found {
			log.Errorf("Failed to get remote cluster %s", cluster)
			return status.Errorf(codes.NotFound, "Remote cluster not found: %s", cluster)
		}
		translator := newEndpointTranslator(
			s.config.ControllerNS,
			remoteConfig.TrustDomain,
			s.config.ForceOpaqueTransport,
			s.config.EnableH2Upgrade,
			false, // Disable endpoint filtering for remote discovery.
			s.config.EnableIPv6,
			s.config.ExtEndpointZoneWeights,
			s.config.MeshedHttp2ClientParams,
			fmt.Sprintf("%s.%s.svc.%s:%d", remoteSvc, service.Namespace, remoteConfig.ClusterDomain, port),
			token.NodeName,
			s.config.DefaultOpaquePorts,
			s.metadataAPI,
			stream,
			streamEnd,
			log,
		)
		translator.Start()
		defer translator.Stop()
		err = remoteWatcher.Subscribe(watcher.ServiceID{Namespace: service.Namespace, Name: remoteSvc}, port, instanceID, translator)
		if err != nil {
			var ise watcher.InvalidService
			if errors.As(err, &ise) {
				log.Debugf("Invalid remote discovery service %s", dest.GetPath())
				return status.Errorf(codes.InvalidArgument, "Invalid authority: %s", dest.GetPath())
			}
			log.Errorf("Failed to subscribe to remote discovery service %q in cluster %s: %s", dest.GetPath(), cluster, err)
			return err
		}
		defer remoteWatcher.Unsubscribe(watcher.ServiceID{Namespace: service.Namespace, Name: remoteSvc}, port, instanceID, translator)
	} else {
		log.Debug("Local discovery service detected")
		translator := newEndpointTranslator(
			s.config.ControllerNS,
			s.config.IdentityTrustDomain,
			s.config.ForceOpaqueTransport,
			s.config.EnableH2Upgrade,
			true,
			s.config.EnableIPv6,
			s.config.ExtEndpointZoneWeights,
			s.config.MeshedHttp2ClientParams,
			dest.GetPath(),
			token.NodeName,
			s.config.DefaultOpaquePorts,
			s.metadataAPI,
			stream,
			streamEnd,
			log,
		)
		translator.Start()
		defer translator.Stop()
		err = s.endpoints.Subscribe(service, port, instanceID, translator)
		if err != nil {
			var ise watcher.InvalidService
			if errors.As(err, &ise) {
				log.Debugf("Invalid service %s", dest.GetPath())
				return status.Errorf(codes.InvalidArgument, "Invalid authority: %s", dest.GetPath())
			}
			log.Errorf("Failed to subscribe to %s: %s", dest.GetPath(), err)
			return err
		}
		defer s.endpoints.Unsubscribe(service, port, instanceID, translator)
	}
	select {
	case <-s.shutdown:
	case <-stream.Context().Done():
		log.Debugf("Get %s cancelled", dest.GetPath())
	case <-streamEnd:
		log.Errorf("Get %s stream aborted", dest.GetPath())
	}
	return nil
}

If we inspect the logs we will be able to see references to these behaviors. The following is an example of local service.

time="2025-06-06T06:11:39Z" level=debug msg="Get simple-app-v1.simple-app.svc.cluster.local:80" addr=":8086" component=server context-ns=simple-app context-pod=traffic-5cf984699d-rvcrz remote="10.23.0.30:45616"
time="2025-06-06T06:11:39Z" level=debug msg="Local discovery service detected" addr=":8086" component=server context-ns=simple-app context-pod=traffic-5cf984699d-rvcrz remote="10.23.0.30:45616"
time="2025-06-06T06:11:39Z" level=debug msg="Hints not available on endpointslice. Zone Filtering disabled. Falling back to routing to all pods" addr=":8086" component=endpoint-translator context-ns=simple-app context-pod=traffic-5cf984699d-rvcrz remote="10.23.0.30:45616" service="simple-app-v1.simple-app.svc.cluster.local:80"
time="2025-06-06T06:11:39Z" level=debug msg="Sending destination add: add:{addrs:{addr:{ip:{ipv4:169279523} port:5678} weight:10000 metric_labels:{key:\"control_plane_ns\" value:\"linkerd\"} metric_labels:{key:\"deployment\" value:\"simple-app-v1\"} metric_labels:{key:\"pod\" value:\"simple-app-v1-57b57f8947-b6bpd\"} metric_labels:{key:\"pod_template_hash\" value:\"57b57f8947\"} metric_labels:{key:\"serviceaccount\" value:\"default\"} metric_labels:{key:\"zone\" value:\"\"} metric_labels:{key:\"zone_locality\" value:\"unknown\"} tls_identity:{dns_like_identity:{name:\"default.simple-app.serviceaccount.identity.linkerd.cluster.local\"} server_name:{name:\"default.simple-app.serviceaccount.identity.linkerd.cluster.local\"}} protocol_hint:{h2:{}}} metric_labels:{key:\"namespace\" value:\"simple-app\"} metric_labels:{key:\"service\" value:\"simple-app-v1\"}}" addr=":8086" component=endpoint-translator context-ns=simple-app context-pod=traffic-5cf984699d-rvcrz remote="10.23.0.30:45616" service="simple-app-v1.simple-app.svc.cluster.local:80"

Get Profile Endpoint

When a proxy opens the stream via this endpoint. The server first tries to extract a context token. That token is just a tiny JSON blob like {"ns":"foo","pod":"foo-abcd","nodeName":"worker-3"} and, if present, it is decoded and stashed on the request’s context so that later code can recognise which workload is asking.

At this point, depending if the host parse as an IP address (meaning that we are dealing with a ClusterIP or a naked pod IP) or a FQDN, it will process them differenly.

IP Address

When the host is an IP the helper getProfileByIP is called. The first thing that function does is look up whether that IP is owned by a Kubernetes Service (getSvcID). Suppose the address was 10.96.0.10; if the informer knows that IP belongs to web.default.svc.cluster.local, the controller treats the request as a service-level query and moves on to subscribeToServiceProfile. If, on the other hand, the IP does not map to any Service—maybe it is a direct pod address like 10.1.2.17 used for debugging then the controller treats it as a single-endpoint query and jumps to subscribeToEndpointProfile.

FQDN

When the host is a DNS name the helper getProfileByName destruct the Kubernetes service hostname into its constituent components with parseK8sServiceName. A fully-qualified Service such as web.default.svc.cluster.local yields a serviceID and an empty instanceID; a pod DNS like web-b4cd56d7c-0.web.default.svc.cluster.local yields both a serviceID and a non-empty instanceID. If the host is not in one of the recognised Kubernetes forms the call is rejected as InvalidArgument. For hosts that are recognised, the decision is again simple: a non-empty instanceID means “this is one pod, give me its per-endpoint policy”, so the controller calls subscribeToEndpointProfile; an empty instanceID means “this is the Service itself”, so it calls subscribeToServiceProfile.

As you can see both approached lead to these subscribeToServiceProfile and subscribeToEndpointProfile functions. Let’s take a look at what they do.

subscribeToServiceProfile

This function assembles the full policy pipeline. First it creates a profileTranslator, handing it the service’s FQN, port, a reference to the open gRPC stream, and a logger that now carries component=profile-translator. Then it decides whether one watch or two are needed. If the proxy did not send a context token the server assumes a single, global point of truth and subscribes once to ServiceProfile objects living in the service’s own namespace (subscribeToServiceWithoutContext).

If the proxy did send a token, imagine a workload in namespace tenant-a calling web.default, the controller starts two simultaneous watches (subscribeToServicesWithContext):

the “primary” watch looks for a ServiceProfile named web.default.svc.cluster.local inside tenant-a,
a “backup” watch looks for the same name in the default control namespace. A little fallbackProfileListener forwards whichever stream produces data first and switches live if one of them goes away, giving tenant overrides precedence without ever leaving the proxy starved of policy.

subscribeToEndpointProfile

This funcrion is lighter. Instead of watching ServiceProfile objects it subscribes the translator to the WorkloadWatcher keyed by the exact ip+port (and the service ID if there is one). The resulting events carry labels, identity information and opaque-port hints for that single pod, but none of the higher-level traffic-split or retry rules that come from a ServicePolicy CRD. It is the path you hit when a proxy opens GetProfile("10.1.2.3:8080") or GetProfile("pod-0.web.default.svc.cluster.local:8080").

Finally, note that the profileTranslator receives every ServiceProfile (or endpoint update) pushed by the watcher, queues it, converts it into a protobuf DestinationProfile, and writes it down the gRPC stream. If the proxy falls behind and the update queue fills, the translator blows a fuse: it increments the profile_updates_queue_overflow metric, closes a side-channel (endStream), and the whole GetProfile RPC is torn down so the proxy can reconnect and catch up with a fresh snapshot. That back-pressure valve protects the controller from ever exploding its heap under load.

So, in everyday terms: if a proxy in namespace simple-app asks for GetProfile("web.default.svc.cluster.local:80"), the controller figures out that “web.default” really is a Service, starts one watch for web.default in simple-app (in case the tenant admin has installed a custom policy) and another in the control plane namespace for the cluster-wide default. Events from those watches flow through a translator that turns YAML into protobuf and drip them back to the proxy. If instead the proxy asks for GetProfile("10.1.2.17:9090"), the controller notices that the address is just a pod, wires a WorkloadWatcher for that single endpoint, and returns only per-pod metadata—no fancy traffic splits, but still the identity and the set of “opaque” ports the proxy must forward blindly.

func (s *server) GetProfile(dest *pb.GetDestination, stream pb.Destination_GetProfileServer) error {
	log := s.log
	client, _ := peer.FromContext(stream.Context())
	if client != nil {
		log = log.WithField("remote", client.Addr)
	}
	var token contextToken
	if dest.GetContextToken() != "" {
		log.Debugf("Dest token: %q", dest.GetContextToken())
		token = s.parseContextToken(dest.GetContextToken())
		log = log.WithFields(logging.Fields{"context-pod": token.Pod, "context-ns": token.Ns})
	}
	log.Debugf("Getting profile for %s", dest.GetPath())
	host, port, err := getHostAndPort(dest.GetPath())
	if err != nil {
		log.Debugf("Invalid address %q", dest.GetPath())
		return status.Errorf(codes.InvalidArgument, "invalid authority: %q: %q", dest.GetPath(), err)
	}
	if ip := net.ParseIP(host); ip != nil {
		err = s.getProfileByIP(token, ip, port, log, stream)
		if err != nil {
			var ise watcher.InvalidService
			if errors.As(err, &ise) {
				log.Debugf("Invalid service %s", dest.GetPath())
				return status.Errorf(codes.InvalidArgument, "Invalid authority: %s", dest.GetPath())
			}
			log.Errorf("Failed to subscribe to profile by ip %q: %q", dest.GetPath(), err)
		}
		return err
	}
	err = s.getProfileByName(token, host, port, log, stream)
	if err != nil {
		var ise watcher.InvalidService
		if errors.As(err, &ise) {
			log.Debugf("Invalid service %s", dest.GetPath())
			return status.Errorf(codes.InvalidArgument, "Invalid authority: %s", dest.GetPath())
		}
		log.Errorf("Failed to subscribe to profile by name %q: %q", dest.GetPath(), err)
	}
	return err
}

If you inspect the logs, you’ll see references to these behaviors. After deploying a new pod (its IP will be 10.23.0.65), you should see output similar to the following:

time="2025-06-16T05:01:32Z" level=debug msg="Dest token: \"{\\\"ns\\\":\\\"linkerd\\\", \\\"nodeName\\\":\\\"k3d-01-server-0\\\", \\\"pod\\\":\\\"linkerd-identity-759dfd7dfc-6qf25\\\"}\\n\"" addr=":8086" component=server remote="10.23.0.47:42728"
time="2025-06-16T05:01:32Z" level=debug msg="Getting profile for linkerd-identity-headless.linkerd.svc.cluster.local:8080" addr=":8086" component=server context-ns=linkerd context-pod=linkerd-identity-759dfd7dfc-6qf25 remote="10.23.0.47:42728"
time="2025-06-16T05:01:32Z" level=debug msg="Starting watch on service linkerd/linkerd-identity-headless" addr=":8086" component=opaque-ports-watcher
time="2025-06-16T05:01:32Z" level=debug msg="Establishing watch on profile linkerd/linkerd-identity-headless.linkerd.svc.cluster.local" addr=":8086" component=profile-watcher
time="2025-06-16T05:01:32Z" level=debug msg="Waiting for primary profile listener to be initialized" addr=":8086" component=server context-ns=linkerd context-pod=linkerd-identity-759dfd7dfc-6qf25 ns=linkerd port=8080 remote="10.23.0.47:42728" svc=linkerd-identity-headless
time="2025-06-16T05:01:32Z" level=debug msg="Establishing watch on profile linkerd/linkerd-identity-headless.linkerd.svc.cluster.local" addr=":8086" component=profile-watcher
time="2025-06-16T05:01:32Z" level=debug msg="Publishing primary profile" addr=":8086" component=server context-ns=linkerd context-pod=linkerd-identity-759dfd7dfc-6qf25 ns=linkerd port=8080 remote="10.23.0.47:42728" svc=linkerd-identity-headless
time="2025-06-16T05:01:32Z" level=debug msg="Using default profile"
time="2025-06-16T05:01:32Z" level=debug msg="Sending profile update: fully_qualified_name:\"linkerd-identity-headless.linkerd.svc.cluster.local\" retry_budget:{retry_ratio:0.2 min_retries_per_second:10 ttl:{seconds:10}} parent_ref:{resource:{group:\"core\" kind:\"Service\" name:\"linkerd-identity-headless\" namespace:\"linkerd\" port:8080}} profile_ref:{resource:{group:\"linkerd.io\"}}" addr=":8086" component=profile-translator context-ns=linkerd context-pod=linkerd-identity-759dfd7dfc-6qf25 ns=linkerd port=8080 remote="10.23.0.47:42728" svc=linkerd-identity-headless
time="2025-06-16T05:01:33Z" level=info msg="PUT https://10.247.0.1:443/apis/coordination.k8s.io/v1/namespaces/linkerd/leases/linkerd-destination-endpoint-write 200 OK in 2 milliseconds"
time="2025-06-16T05:01:33Z" level=debug msg="Dest token: \"{\\\"ns\\\":\\\"linkerd-buoyant\\\", \\\"nodeName\\\":\\\"k3d-01-server-0\\\", \\\"pod\\\":\\\"buoyant-cloud-metrics-qpzq6\\\"}\\n\"" addr=":8086" component=server remote="10.23.0.47:53780"
time="2025-06-16T05:01:33Z" level=debug msg="Getting profile for 10.23.0.65:4191" addr=":8086" component=server context-ns=linkerd-buoyant context-pod=buoyant-cloud-metrics-qpzq6 remote="10.23.0.47:53780"
time="2025-06-16T05:01:33Z" level=debug msg="Establishing watch on workload 10.23.0.65:4191" addr=":8086" component=workload-watcher
time="2025-06-16T05:01:33Z" level=debug msg="found 10.23.0.65 on the pod network" addr=":8086" component=workload-watcher
time="2025-06-16T05:01:33Z" level=debug msg="Created endpoint: addr:{ip:{ipv4:169279553} port:4191} weight:10000" addr=":8086" component=endpoint-profile-translator context-ns=linkerd-buoyant context-pod=buoyant-cloud-metrics-qpzq6 remote="10.23.0.47:53780"
time="2025-06-16T05:01:33Z" level=debug msg="Sending profile update: retry_budget:{retry_ratio:0.2 min_retries_per_second:10 ttl:{seconds:10}} endpoint:{addr:{ip:{ipv4:169279553} port:4191} weight:10000}" addr=":8086" component=endpoint-profile-translator context-ns=linkerd-buoyant context-pod=buoyant-cloud-metrics-qpzq6 remote="10.23.0.47:53780"
time="2025-06-16T05:01:35Z" level=info msg="PUT https://10.247.0.1:443/apis/coordination.k8s.io/v1/namespaces/linkerd/leases/linkerd-destination-endpoint-write 200 OK in 3 milliseconds"
time="2025-06-16T05:01:37Z" level=info msg="PUT https://10.247.0.1:443/apis/coordination.k8s.io/v1/namespaces/linkerd/leases/linkerd-destination-endpoint-write 200 OK in 3 milliseconds"
time="2025-06-16T05:01:37Z" level=debug msg="Pod curl-test.simple-app started running" addr=":8086" component=workload-publisher ip=10.23.0.65 port=4191
time="2025-06-16T05:01:37Z" level=debug msg="Created endpoint: addr:{ip:{ipv4:169279553} port:4191} weight:10000 metric_labels:{key:\"control_plane_ns\" value:\"linkerd\"} metric_labels:{key:\"namespace\" value:\"simple-app\"} metric_labels:{key:\"pod\" value:\"curl-test\"} metric_labels:{key:\"serviceaccount\" value:\"default\"} metric_labels:{key:\"zone\" value:\"\"} tls_identity:{dns_like_identity:{name:\"default.simple-app.serviceaccount.identity.linkerd.cluster.local\"} server_name:{name:\"default.simple-app.serviceaccount.identity.linkerd.cluster.local\"}} protocol_hint:{h2:{}}" addr=":8086" component=endpoint-profile-translator context-ns=linkerd-buoyant context-pod=buoyant-cloud-metrics-qpzq6 remote="10.23.0.47:53780"
time="2025-06-16T05:01:37Z" level=debug msg="Sending profile update: retry_budget:{retry_ratio:0.2 min_retries_per_second:10 ttl:{seconds:10}} endpoint:{addr:{ip:{ipv4:169279553} port:4191} weight:10000 metric_labels:{key:\"control_plane_ns\" value:\"linkerd\"} metric_labels:{key:\"namespace\" value:\"simple-app\"} metric_labels:{key:\"pod\" value:\"curl-test\"} metric_labels:{key:\"serviceaccount\" value:\"default\"} metric_labels:{key:\"zone\" value:\"\"} tls_identity:{dns_like_identity:{name:\"default.simple-app.serviceaccount.identity.linkerd.cluster.local\"} server_name:{name:\"default.simple-app.serviceaccount.identity.linkerd.cluster.local\"}} protocol_hint:{h2:{}}}" addr=":8086" component=endpoint-profile-translator context-ns=linkerd-buoyant context-pod=buoyant-cloud-metrics-qpzq6 remote="10.23.0.47:53780"

Metrics

The destination controller will exponse an extensive quantity of metrics that can be summarized by sending a GET request to the /metrics endpoint of the port 9996.

kubectl -n linkerd port-forward deploy/linkerd-destination 9996
curl -s http://localhost:9996/metrics

The destination controller wil emit metrics related to the cluster store

# HELP cluster_store_size The number of linked clusters in the remote discovery cluster store
# TYPE cluster_store_size gauge
cluster_store_size 0

The destination controller will emit metrics related to the number of RPCs sent, received, and completed by the Server.


# HELP grpc_server_handled_total Total number of RPCs completed on the server, regardless of success or failure.
# TYPE grpc_server_handled_total counter
grpc_server_handled_total{grpc_code="OK",grpc_method="Get",grpc_service="io.linkerd.proxy.destination.Destination",grpc_type="server_stream"} 318
grpc_server_handled_total{grpc_code="OK",grpc_method="GetProfile",grpc_service="io.linkerd.proxy.destination.Destination",grpc_type="server_stream"} 318

# HELP grpc_server_handling_seconds Histogram of response latency (seconds) of gRPC that had been application-level handled by the server.
# TYPE grpc_server_handling_seconds histogram
grpc_server_handling_seconds_bucket{grpc_method="Get",grpc_service="io.linkerd.proxy.destination.Destination",grpc_type="server_stream",le="0.005"} 0
...
grpc_server_handling_seconds_sum{grpc_method="Get",grpc_service="io.linkerd.proxy.destination.Destination",grpc_type="server_stream"} 8135.809845959998
grpc_server_handling_seconds_count{grpc_method="Get",grpc_service="io.linkerd.proxy.destination.Destination",grpc_type="server_stream"} 318
grpc_server_handling_seconds_bucket{grpc_method="GetProfile",grpc_service="io.linkerd.proxy.destination.Destination",grpc_type="server_stream",le="0.005"} 0
... 
grpc_server_handling_seconds_sum{grpc_method="GetProfile",grpc_service="io.linkerd.proxy.destination.Destination",grpc_type="server_stream"} 9666.61187774301
grpc_server_handling_seconds_count{grpc_method="GetProfile",grpc_service="io.linkerd.proxy.destination.Destination",grpc_type="server_stream"} 318

# HELP grpc_server_msg_received_total Total number of RPC stream messages received on the server.
# TYPE grpc_server_msg_received_total counter
grpc_server_msg_received_total{grpc_method="Get",grpc_service="io.linkerd.proxy.destination.Destination",grpc_type="server_stream"} 325
grpc_server_msg_received_total{grpc_method="GetProfile",grpc_service="io.linkerd.proxy.destination.Destination",grpc_type="server_stream"} 328

# HELP grpc_server_msg_sent_total Total number of gRPC stream messages sent by the server.
# TYPE grpc_server_msg_sent_total counter
grpc_server_msg_sent_total{grpc_method="Get",grpc_service="io.linkerd.proxy.destination.Destination",grpc_type="server_stream"} 319
grpc_server_msg_sent_total{grpc_method="GetProfile",grpc_service="io.linkerd.proxy.destination.Destination",grpc_type="server_stream"} 328

# HELP grpc_server_started_total Total number of RPCs started on the server.
# TYPE grpc_server_started_total counter
grpc_server_started_total{grpc_method="Get",grpc_service="io.linkerd.proxy.destination.Destination",grpc_type="server_stream"} 325
grpc_server_started_total{grpc_method="GetProfile",grpc_service="io.linkerd.proxy.destination.Destination",grpc_type="server_stream"} 328

Destination gRPC Metrics

The destination controller will emit metrics related to each informer with a gauge named <kind>_cache_size that reports the current number of items in that informer’s cache.

# HELP endpoints_cache_size Number of items in the client-go endpoints cache
# TYPE endpoints_cache_size gauge
endpoints_cache_size{cluster="local"} 26

# HELP job_cache_size Number of items in the client-go job cache
# TYPE job_cache_size gauge
job_cache_size{cluster="local"} 0

# HELP node_cache_size Number of items in the client-go node cache
# TYPE node_cache_size gauge
node_cache_size{cluster="local"} 4

# HELP pod_cache_size Number of items in the client-go pod cache
# TYPE pod_cache_size gauge
pod_cache_size{cluster="local"} 24

# HELP replicaset_cache_size Number of items in the client-go replicaset cache
# TYPE replicaset_cache_size gauge
replicaset_cache_size{cluster="local"} 24

# HELP server_cache_size Number of items in the client-go server cache
# TYPE server_cache_size gauge
server_cache_size{cluster="local"} 0

# HELP service_cache_size Number of items in the client-go service cache
# TYPE service_cache_size gauge
service_cache_size{cluster="local"} 26

# HELP serviceprofile_cache_size Number of items in the client-go serviceprofile cache
# TYPE serviceprofile_cache_size gauge
serviceprofile_cache_size{cluster="local"} 0

Destination Cache Metrics

The watchers will also expose metrics related to the lag (in seconds) between the last update to a specific object and its processing by the informer with a gauge named <kind>_informer_lag_seconds.

# HELP endpoints_informer_lag_seconds The amount of time between when an Endpoints resource is updated and when an informer observes it
# TYPE endpoints_informer_lag_seconds histogram
endpoints_informer_lag_seconds_bucket{le="0.5"} 0
...
endpoints_informer_lag_seconds_sum 0
endpoints_informer_lag_seconds_count 0

# HELP endpointslices_informer_lag_seconds The amount of time between when an EndpointSlice resource is updated and when an informer observes it
# TYPE endpointslices_informer_lag_seconds histogram
endpointslices_informer_lag_seconds_bucket{le="0.5"} 4
...
endpointslices_informer_lag_seconds_sum 30.301216474000004
endpointslices_informer_lag_seconds_count 42

# HELP externalworkload_cache_size Number of items in the client-go externalworkload cache
# TYPE externalworkload_cache_size gauge
externalworkload_cache_size{cluster="local"} 0

# HELP externalworkload_informer_lag_seconds The amount of time between when an ExternalWorkload resource is updated and when an informer observes it
# TYPE externalworkload_informer_lag_seconds histogram
externalworkload_informer_lag_seconds_bucket{le="0.5"} 0
...
externalworkload_informer_lag_seconds_sum 0
externalworkload_informer_lag_seconds_count 0

# HELP pods_informer_lag_seconds The amount of time between when a Pod resource is updated and when an informer observes it
# TYPE pods_informer_lag_seconds histogram
pods_informer_lag_seconds_bucket{le="0.5"} 6
...
pods_informer_lag_seconds_sum 41.22129725400001
pods_informer_lag_seconds_count 56

# HELP servers_informer_lag_seconds The amount of time between when a Server resource is updated and when an informer observes it
# TYPE servers_informer_lag_seconds histogram
servers_informer_lag_seconds_bucket{le="0.5"} 0
...
servers_informer_lag_seconds_sum 0
servers_informer_lag_seconds_count 0