summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Waldon <brian.waldon@rackspace.com>2011-09-08 15:40:30 -0400
committerBrian Waldon <brian.waldon@rackspace.com>2011-09-08 15:40:30 -0400
commit71d0b4644aa16c2aa792ea47c3b6781ff3252423 (patch)
tree83b03b81312a2d63f86791b5db6fd393ce9ce254
parentb24189ad5df28ae9acfa8fc5feaa8971e676343e (diff)
parent816ae51f6580e3ed20597157f94a4ffd4e690c04 (diff)
merging trunk
-rw-r--r--.mailmap1
-rw-r--r--Authors1
-rwxr-xr-xbin/nova-manage41
-rw-r--r--nova/api/openstack/views/addresses.py1
-rw-r--r--nova/db/api.py10
-rw-r--r--nova/db/sqlalchemy/api.py29
-rw-r--r--nova/db/sqlalchemy/migrate_repo/versions/045_add_network_priority.py44
-rw-r--r--nova/db/sqlalchemy/models.py1
-rw-r--r--nova/exception.py4
-rw-r--r--nova/network/manager.py40
-rw-r--r--nova/network/quantum/__init__.py16
-rw-r--r--nova/network/quantum/client.py307
-rw-r--r--nova/network/quantum/manager.py324
-rw-r--r--nova/network/quantum/melange_connection.py141
-rw-r--r--nova/network/quantum/melange_ipam_lib.py205
-rw-r--r--nova/network/quantum/nova_ipam_lib.py195
-rw-r--r--nova/network/quantum/quantum_connection.py118
-rw-r--r--nova/scheduler/base_scheduler.py16
-rw-r--r--nova/tests/scheduler/test_abstract_scheduler.py33
-rw-r--r--nova/tests/test_network.py7
-rw-r--r--nova/tests/test_quantum.py323
-rw-r--r--nova/tests/test_xenapi.py2
-rw-r--r--nova/virt/libvirt/vif.py2
-rw-r--r--nova/virt/xenapi/vmops.py13
24 files changed, 1843 insertions, 31 deletions
diff --git a/.mailmap b/.mailmap
index 5c8df80e0..f2f59d81b 100644
--- a/.mailmap
+++ b/.mailmap
@@ -15,6 +15,7 @@
<code@term.ie> <termie@preciousroy.local>
<corywright@gmail.com> <cory.wright@rackspace.com>
<dan@nicira.com> <danwent@dan-xs3-cs>
+<dan@nicira.com> danwent@gmail.com
<devin.carlen@gmail.com> <devcamcar@illian.local>
<ewan.mellor@citrix.com> <emellor@silver>
<itoumsn@nttdata.co.jp> <itoumsn@shayol>
diff --git a/Authors b/Authors
index f647a7c00..a6598103f 100644
--- a/Authors
+++ b/Authors
@@ -11,6 +11,7 @@ Antony Messerli <ant@openstack.org>
Armando Migliaccio <Armando.Migliaccio@eu.citrix.com>
Arvind Somya <asomya@cisco.com>
Bilal Akhtar <bilalakhtar@ubuntu.com>
+Brad Hall <brad@nicira.com>
Brian Lamar <brian.lamar@rackspace.com>
Brian Schott <bschott@isi.edu>
Brian Waldon <brian.waldon@rackspace.com>
diff --git a/bin/nova-manage b/bin/nova-manage
index c3b2c71ce..bc191b2f0 100755
--- a/bin/nova-manage
+++ b/bin/nova-manage
@@ -59,11 +59,11 @@ import glob
import json
import math
import netaddr
+from optparse import OptionParser
import os
import sys
import time
-from optparse import OptionParser
# If ../nova/__init__.py exists, add ../ to Python search path, so that
# it will override what happens to be installed in /usr/(local/)lib/python...
@@ -685,10 +685,17 @@ class NetworkCommands(object):
help='Multi host')
@args('--dns1', dest="dns1", metavar="<DNS Address>", help='First DNS')
@args('--dns2', dest="dns2", metavar="<DNS Address>", help='Second DNS')
+ @args('--uuid', dest="net_uuid", metavar="<network uuid>",
+ help='Network UUID')
+ @args('--project_id', dest="project_id", metavar="<project id>",
+ help='Project id')
+ @args('--priority', dest="priority", metavar="<number>",
+ help='Network interface priority')
def create(self, label=None, fixed_range_v4=None, num_networks=None,
network_size=None, multi_host=None, vlan_start=None,
vpn_start=None, fixed_range_v6=None, gateway_v6=None,
- bridge=None, bridge_interface=None, dns1=None, dns2=None):
+ bridge=None, bridge_interface=None, dns1=None, dns2=None,
+ project_id=None, priority=None, uuid=None):
"""Creates fixed ips for host by range"""
# check for certain required inputs
@@ -765,7 +772,10 @@ class NetworkCommands(object):
bridge=bridge,
bridge_interface=bridge_interface,
dns1=dns1,
- dns2=dns2)
+ dns2=dns2,
+ project_id=project_id,
+ priority=priority,
+ uuid=uuid)
def list(self):
"""List all created networks"""
@@ -790,16 +800,29 @@ class NetworkCommands(object):
network.project_id,
network.uuid)
+ def quantum_list(self):
+ """List all created networks with Quantum-relevant fields"""
+ _fmt = "%-32s\t%-10s\t%-10s\t%s , %s"
+ print _fmt % (_('uuid'),
+ _('project'),
+ _('priority'),
+ _('cidr_v4'),
+ _('cidr_v6'))
+ for network in db.network_get_all(context.get_admin_context()):
+ print _fmt % (network.uuid,
+ network.project_id,
+ network.priority,
+ network.cidr,
+ network.cidr_v6)
+
@args('--network', dest="fixed_range", metavar='<x.x.x.x/yy>',
help='Network to delete')
def delete(self, fixed_range):
"""Deletes a network"""
- network = db.network_get_by_cidr(context.get_admin_context(), \
- fixed_range)
- if network.project_id is not None:
- raise ValueError(_('Network must be disassociated from project %s'
- ' before delete' % network.project_id))
- db.network_delete_safe(context.get_admin_context(), network.id)
+
+ # delete the network
+ net_manager = utils.import_object(FLAGS.network_manager)
+ net_manager.delete_network(context.get_admin_context(), fixed_range)
@args('--network', dest="fixed_range", metavar='<x.x.x.x/yy>',
help='Network to modify')
diff --git a/nova/api/openstack/views/addresses.py b/nova/api/openstack/views/addresses.py
index 8f07a2289..8d38bc9c3 100644
--- a/nova/api/openstack/views/addresses.py
+++ b/nova/api/openstack/views/addresses.py
@@ -88,7 +88,6 @@ class ViewBuilderV11(ViewBuilder):
try:
return interface['network']['label']
except (TypeError, KeyError) as exc:
- LOG.exception(exc)
raise TypeError
def _extract_ipv4_addresses(self, interface):
diff --git a/nova/db/api.py b/nova/db/api.py
index 148887635..c03a86671 100644
--- a/nova/db/api.py
+++ b/nova/db/api.py
@@ -420,6 +420,11 @@ def virtual_interface_get_by_address(context, address):
return IMPL.virtual_interface_get_by_address(context, address)
+def virtual_interface_get_by_uuid(context, vif_uuid):
+ """Gets a virtual interface from the table filtering on vif uuid."""
+ return IMPL.virtual_interface_get_by_uuid(context, vif_uuid)
+
+
def virtual_interface_get_by_fixed_ip(context, fixed_ip_id):
"""Gets the virtual interface fixed_ip is associated with."""
return IMPL.virtual_interface_get_by_fixed_ip(context, fixed_ip_id)
@@ -715,6 +720,11 @@ def network_get_by_bridge(context, bridge):
return IMPL.network_get_by_bridge(context, bridge)
+def network_get_by_uuid(context, uuid):
+ """Get a network by uuid or raise if it does not exist."""
+ return IMPL.network_get_by_uuid(context, uuid)
+
+
def network_get_by_cidr(context, cidr):
"""Get a network by cidr or raise if it does not exist"""
return IMPL.network_get_by_cidr(context, cidr)
diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py
index b99667afc..523258841 100644
--- a/nova/db/sqlalchemy/api.py
+++ b/nova/db/sqlalchemy/api.py
@@ -945,6 +945,22 @@ def virtual_interface_get_by_address(context, address):
@require_context
+def virtual_interface_get_by_uuid(context, vif_uuid):
+ """Gets a virtual interface from the table.
+
+ :param vif_uuid: the uuid of the interface you're looking to get
+ """
+ session = get_session()
+ vif_ref = session.query(models.VirtualInterface).\
+ filter_by(uuid=vif_uuid).\
+ options(joinedload('network')).\
+ options(joinedload('instance')).\
+ options(joinedload('fixed_ips')).\
+ first()
+ return vif_ref
+
+
+@require_context
def virtual_interface_get_by_fixed_ip(context, fixed_ip_id):
"""Gets the virtual interface fixed_ip is associated with.
@@ -1858,6 +1874,19 @@ def network_get_by_bridge(context, bridge):
@require_admin_context
+def network_get_by_uuid(context, uuid):
+ session = get_session()
+ result = session.query(models.Network).\
+ filter_by(uuid=uuid).\
+ filter_by(deleted=False).\
+ first()
+
+ if not result:
+ raise exception.NetworkNotFoundForUUID(uuid=uuid)
+ return result
+
+
+@require_admin_context
def network_get_by_cidr(context, cidr):
session = get_session()
result = session.query(models.Network).\
diff --git a/nova/db/sqlalchemy/migrate_repo/versions/045_add_network_priority.py b/nova/db/sqlalchemy/migrate_repo/versions/045_add_network_priority.py
new file mode 100644
index 000000000..b9b0ea37c
--- /dev/null
+++ b/nova/db/sqlalchemy/migrate_repo/versions/045_add_network_priority.py
@@ -0,0 +1,44 @@
+# Copyright 2011 Nicira, Inc.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+from sqlalchemy import *
+from migrate import *
+
+from nova import log as logging
+from nova import utils
+
+
+meta = MetaData()
+
+networks = Table('networks', meta,
+ Column("id", Integer(), primary_key=True, nullable=False))
+
+# Add priority column to networks table
+priority = Column('priority', Integer())
+
+
+def upgrade(migrate_engine):
+ meta.bind = migrate_engine
+
+ try:
+ networks.create_column(priority)
+ except Exception:
+ logging.error(_("priority column not added to networks table"))
+ raise
+
+
+def downgrade(migrate_engine):
+ meta.bind = migrate_engine
+ networks.drop_column(priority)
diff --git a/nova/db/sqlalchemy/models.py b/nova/db/sqlalchemy/models.py
index 854034f12..211049112 100644
--- a/nova/db/sqlalchemy/models.py
+++ b/nova/db/sqlalchemy/models.py
@@ -628,6 +628,7 @@ class Network(BASE, NovaBase):
dhcp_start = Column(String(255))
project_id = Column(String(255))
+ priority = Column(Integer)
host = Column(String(255)) # , ForeignKey('hosts.id'))
uuid = Column(String(36))
diff --git a/nova/exception.py b/nova/exception.py
index 95d8229b5..8e73fdfc8 100644
--- a/nova/exception.py
+++ b/nova/exception.py
@@ -435,6 +435,10 @@ class NetworkNotFoundForBridge(NetworkNotFound):
message = _("Network could not be found for bridge %(bridge)s")
+class NetworkNotFoundForUUID(NetworkNotFound):
+ message = _("Network could not be found for uuid %(uuid)s")
+
+
class NetworkNotFoundForCidr(NetworkNotFound):
message = _("Network could not be found with cidr %(cidr)s.")
diff --git a/nova/network/manager.py b/nova/network/manager.py
index 050cec250..a7bdcbc30 100644
--- a/nova/network/manager.py
+++ b/nova/network/manager.py
@@ -74,7 +74,7 @@ flags.DEFINE_string('flat_network_bridge', None,
'Bridge for simple network instances')
flags.DEFINE_string('flat_network_dns', '8.8.4.4',
'Dns for simple network')
-flags.DEFINE_bool('flat_injected', True,
+flags.DEFINE_bool('flat_injected', False,
'Whether to attempt to inject network setup into guest')
flags.DEFINE_string('flat_interface', None,
'FlatDhcp will bridge into this interface if set')
@@ -484,6 +484,9 @@ class NetworkManager(manager.SchedulerDependentManager):
for vif in vifs:
network = vif['network']
+ if network is None:
+ continue
+
# determine which of the instance's IPs belong to this network
network_IPs = [fixed_ip['address'] for fixed_ip in fixed_ips if
fixed_ip['network_id'] == network['id']]
@@ -546,21 +549,23 @@ class NetworkManager(manager.SchedulerDependentManager):
def _allocate_mac_addresses(self, context, instance_id, networks):
"""Generates mac addresses and creates vif rows in db for them."""
for network in networks:
- vif = {'address': self.generate_mac_address(),
+ self.add_virtual_interface(context, instance_id, network['id'])
+
+ def add_virtual_interface(self, context, instance_id, network_id):
+ vif = {'address': self.generate_mac_address(),
'instance_id': instance_id,
- 'network_id': network['id'],
+ 'network_id': network_id,
'uuid': str(utils.gen_uuid())}
- # try FLAG times to create a vif record with a unique mac_address
- for i in range(FLAGS.create_unique_mac_address_attempts):
- try:
- self.db.virtual_interface_create(context, vif)
- break
- except exception.VirtualInterfaceCreateException:
- vif['address'] = self.generate_mac_address()
- else:
- self.db.virtual_interface_delete_by_instance(context,
+ # try FLAG times to create a vif record with a unique mac_address
+ for _ in xrange(FLAGS.create_unique_mac_address_attempts):
+ try:
+ return self.db.virtual_interface_create(context, vif)
+ except exception.VirtualInterfaceCreateException:
+ vif['address'] = self.generate_mac_address()
+ else:
+ self.db.virtual_interface_delete_by_instance(context,
instance_id)
- raise exception.VirtualInterfaceMacAddressException()
+ raise exception.VirtualInterfaceMacAddressException()
def generate_mac_address(self):
"""Generate an Ethernet MAC address."""
@@ -789,6 +794,15 @@ class NetworkManager(manager.SchedulerDependentManager):
self._create_fixed_ips(context, network['id'])
return networks
+ def delete_network(self, context, fixed_range, require_disassociated=True):
+
+ network = db.network_get_by_cidr(context, fixed_range)
+
+ if require_disassociated and network.project_id is not None:
+ raise ValueError(_('Network must be disassociated from project %s'
+ ' before delete' % network.project_id))
+ db.network_delete_safe(context, network.id)
+
@property
def _bottom_reserved_ips(self): # pylint: disable=R0201
"""Number of reserved ips at the bottom of the range."""
diff --git a/nova/network/quantum/__init__.py b/nova/network/quantum/__init__.py
new file mode 100644
index 000000000..f7fbfb511
--- /dev/null
+++ b/nova/network/quantum/__init__.py
@@ -0,0 +1,16 @@
+# vim: tabstop=4 shiftwidth=4 softtabstop=4
+
+# Copyright 2011 Nicira Networks
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
diff --git a/nova/network/quantum/client.py b/nova/network/quantum/client.py
new file mode 100644
index 000000000..40c68dfdc
--- /dev/null
+++ b/nova/network/quantum/client.py
@@ -0,0 +1,307 @@
+# vim: tabstop=4 shiftwidth=4 softtabstop=4
+
+# Copyright 2011 Citrix Systems
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+# @author: Tyler Smith, Cisco Systems
+
+import httplib
+import json
+import socket
+import urllib
+
+
+# FIXME(danwent): All content in this file should be removed once the
+# packaging work for the quantum client libraries is complete.
+# At that point, we will be able to just install the libraries as a
+# dependency and import from quantum.client.* and quantum.common.*
+# Until then, we have simplified versions of these classes in this file.
+
+class JSONSerializer(object):
+ """This is a simple json-only serializer to use until we can just grab
+ the standard serializer from the quantum library.
+ """
+ def serialize(self, data, content_type):
+ try:
+ return json.dumps(data)
+ except TypeError:
+ pass
+ return json.dumps(to_primitive(data))
+
+ def deserialize(self, data, content_type):
+ return json.loads(data)
+
+
+# The full client lib will expose more
+# granular exceptions, for now, just try to distinguish
+# between the cases we care about.
+class QuantumNotFoundException(Exception):
+ """Indicates that Quantum Server returned 404"""
+ pass
+
+
+class QuantumServerException(Exception):
+ """Indicates any non-404 error from Quantum Server"""
+ pass
+
+
+class QuantumIOException(Exception):
+ """Indicates network IO trouble reaching Quantum Server"""
+ pass
+
+
+class api_call(object):
+ """A Decorator to add support for format and tenant overriding"""
+ def __init__(self, func):
+ self.func = func
+
+ def __get__(self, instance, owner):
+ def with_params(*args, **kwargs):
+ """Temporarily set format and tenant for this request"""
+ (format, tenant) = (instance.format, instance.tenant)
+
+ if 'format' in kwargs:
+ instance.format = kwargs['format']
+ if 'tenant' in kwargs:
+ instance.tenant = kwargs['tenant']
+
+ ret = None
+ try:
+ ret = self.func(instance, *args)
+ finally:
+ (instance.format, instance.tenant) = (format, tenant)
+ return ret
+ return with_params
+
+
+class Client(object):
+ """A base client class - derived from Glance.BaseClient"""
+
+ action_prefix = '/v1.0/tenants/{tenant_id}'
+
+ """Action query strings"""
+ networks_path = "/networks"
+ network_path = "/networks/%s"
+ ports_path = "/networks/%s/ports"
+ port_path = "/networks/%s/ports/%s"
+ attachment_path = "/networks/%s/ports/%s/attachment"
+
+ def __init__(self, host="127.0.0.1", port=9696, use_ssl=False, tenant=None,
+ format="xml", testing_stub=None, key_file=None,
+ cert_file=None, logger=None):
+ """Creates a new client to some service.
+
+ :param host: The host where service resides
+ :param port: The port where service resides
+ :param use_ssl: True to use SSL, False to use HTTP
+ :param tenant: The tenant ID to make requests with
+ :param format: The format to query the server with
+ :param testing_stub: A class that stubs basic server methods for tests
+ :param key_file: The SSL key file to use if use_ssl is true
+ :param cert_file: The SSL cert file to use if use_ssl is true
+ """
+ self.host = host
+ self.port = port
+ self.use_ssl = use_ssl
+ self.tenant = tenant
+ self.format = format
+ self.connection = None
+ self.testing_stub = testing_stub
+ self.key_file = key_file
+ self.cert_file = cert_file
+ self.logger = logger
+
+ def get_connection_type(self):
+ """Returns the proper connection type"""
+ if self.testing_stub:
+ return self.testing_stub
+ elif self.use_ssl:
+ return httplib.HTTPSConnection
+ else:
+ return httplib.HTTPConnection
+
+ def do_request(self, method, action, body=None,
+ headers=None, params=None):
+ """Connects to the server and issues a request.
+ Returns the result data, or raises an appropriate exception if
+ HTTP status code is not 2xx
+
+ :param method: HTTP method ("GET", "POST", "PUT", etc...)
+ :param body: string of data to send, or None (default)
+ :param headers: mapping of key/value pairs to add as headers
+ :param params: dictionary of key/value pairs to add to append
+ to action
+ """
+
+ # Ensure we have a tenant id
+ if not self.tenant:
+ raise Exception(_("Tenant ID not set"))
+
+ # Add format and tenant_id
+ action += ".%s" % self.format
+ action = Client.action_prefix + action
+ action = action.replace('{tenant_id}', self.tenant)
+
+ if type(params) is dict:
+ action += '?' + urllib.urlencode(params)
+
+ try:
+ connection_type = self.get_connection_type()
+ headers = headers or {"Content-Type":
+ "application/%s" % self.format}
+
+ # Open connection and send request, handling SSL certs
+ certs = {'key_file': self.key_file, 'cert_file': self.cert_file}
+ certs = dict((x, certs[x]) for x in certs if certs[x] != None)
+
+ if self.use_ssl and len(certs):
+ c = connection_type(self.host, self.port, **certs)
+ else:
+ c = connection_type(self.host, self.port)
+
+ if self.logger:
+ self.logger.debug(
+ _("Quantum Client Request:\n%(method)s %(action)s\n" %
+ locals()))
+ if body:
+ self.logger.debug(body)
+
+ c.request(method, action, body, headers)
+ res = c.getresponse()
+ status_code = self.get_status_code(res)
+ data = res.read()
+
+ if self.logger:
+ self.logger.debug("Quantum Client Reply (code = %s) :\n %s" \
+ % (str(status_code), data))
+
+ if status_code == httplib.NOT_FOUND:
+ raise QuantumNotFoundException(
+ _("Quantum entity not found: %s" % data))
+
+ if status_code in (httplib.OK,
+ httplib.CREATED,
+ httplib.ACCEPTED,
+ httplib.NO_CONTENT):
+ if data is not None and len(data):
+ return self.deserialize(data, status_code)
+ else:
+ raise QuantumServerException(
+ _("Server %(status_code)s error: %(data)s"
+ % locals()))
+
+ except (socket.error, IOError), e:
+ raise QuantumIOException(_("Unable to connect to "
+ "server. Got error: %s" % e))
+
+ def get_status_code(self, response):
+ """Returns the integer status code from the response, which
+ can be either a Webob.Response (used in testing) or httplib.Response
+ """
+ if hasattr(response, 'status_int'):
+ return response.status_int
+ else:
+ return response.status
+
+ def serialize(self, data):
+ if not data:
+ return None
+ elif type(data) is dict:
+ return JSONSerializer().serialize(data, self.content_type())
+ else:
+ raise Exception(_("unable to deserialize object of type = '%s'" %
+ type(data)))
+
+ def deserialize(self, data, status_code):
+ if status_code == 202:
+ return data
+ return JSONSerializer().deserialize(data, self.content_type())
+
+ def content_type(self, format=None):
+ if not format:
+ format = self.format
+ return "application/%s" % (format)
+
+ @api_call
+ def list_networks(self):
+ """Fetches a list of all networks for a tenant"""
+ return self.do_request("GET", self.networks_path)
+
+ @api_call
+ def show_network_details(self, network):
+ """Fetches the details of a certain network"""
+ return self.do_request("GET", self.network_path % (network))
+
+ @api_call
+ def create_network(self, body=None):
+ """Creates a new network"""
+ body = self.serialize(body)
+ return self.do_request("POST", self.networks_path, body=body)
+
+ @api_call
+ def update_network(self, network, body=None):
+ """Updates a network"""
+ body = self.serialize(body)
+ return self.do_request("PUT", self.network_path % (network), body=body)
+
+ @api_call
+ def delete_network(self, network):
+ """Deletes the specified network"""
+ return self.do_request("DELETE", self.network_path % (network))
+
+ @api_call
+ def list_ports(self, network):
+ """Fetches a list of ports on a given network"""
+ return self.do_request("GET", self.ports_path % (network))
+
+ @api_call
+ def show_port_details(self, network, port):
+ """Fetches the details of a certain port"""
+ return self.do_request("GET", self.port_path % (network, port))
+
+ @api_call
+ def create_port(self, network, body=None):
+ """Creates a new port on a given network"""
+ body = self.serialize(body)
+ return self.do_request("POST", self.ports_path % (network), body=body)
+
+ @api_call
+ def delete_port(self, network, port):
+ """Deletes the specified port from a network"""
+ return self.do_request("DELETE", self.port_path % (network, port))
+
+ @api_call
+ def set_port_state(self, network, port, body=None):
+ """Sets the state of the specified port"""
+ body = self.serialize(body)
+ return self.do_request("PUT",
+ self.port_path % (network, port), body=body)
+
+ @api_call
+ def show_port_attachment(self, network, port):
+ """Fetches the attachment-id associated with the specified port"""
+ return self.do_request("GET", self.attachment_path % (network, port))
+
+ @api_call
+ def attach_resource(self, network, port, body=None):
+ """Sets the attachment-id of the specified port"""
+ body = self.serialize(body)
+ return self.do_request("PUT",
+ self.attachment_path % (network, port), body=body)
+
+ @api_call
+ def detach_resource(self, network, port):
+ """Removes the attachment-id of the specified port"""
+ return self.do_request("DELETE",
+ self.attachment_path % (network, port))
diff --git a/nova/network/quantum/manager.py b/nova/network/quantum/manager.py
new file mode 100644
index 000000000..23a9aba0d
--- /dev/null
+++ b/nova/network/quantum/manager.py
@@ -0,0 +1,324 @@
+# vim: tabstop=4 shiftwidth=4 softtabstop=4
+
+# Copyright 2011 Nicira Networks, Inc
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+from nova import db
+from nova import exception
+from nova import flags
+from nova import log as logging
+from nova import manager
+from nova.network import manager
+from nova.network.quantum import quantum_connection
+from nova import utils
+
+LOG = logging.getLogger("nova.network.quantum.manager")
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string('quantum_ipam_lib',
+ 'nova.network.quantum.nova_ipam_lib',
+ "Indicates underlying IP address management library")
+
+
+class QuantumManager(manager.FlatManager):
+ """NetworkManager class that communicates with a Quantum service
+ via a web services API to provision VM network connectivity.
+
+ For IP Address management, QuantumManager can be configured to
+ use either Nova's local DB or the Melange IPAM service.
+
+ Currently, the QuantumManager does NOT support any of the 'gateway'
+ functionality implemented by the Nova VlanManager, including:
+ * floating IPs
+ * DHCP
+ * NAT gateway
+
+ Support for these capabilities are targted for future releases.
+ """
+
+ def __init__(self, q_conn=None, ipam_lib=None, *args, **kwargs):
+ """Initialize two key libraries, the connection to a
+ Quantum service, and the library for implementing IPAM.
+
+ Calls inherited FlatManager constructor.
+ """
+
+ if not q_conn:
+ q_conn = quantum_connection.QuantumClientConnection()
+ self.q_conn = q_conn
+
+ if not ipam_lib:
+ ipam_lib = FLAGS.quantum_ipam_lib
+ self.ipam = utils.import_object(ipam_lib).get_ipam_lib(self)
+
+ super(QuantumManager, self).__init__(*args, **kwargs)
+
+ def create_networks(self, context, label, cidr, multi_host, num_networks,
+ network_size, cidr_v6, gateway_v6, bridge,
+ bridge_interface, dns1=None, dns2=None, uuid=None,
+ **kwargs):
+ """Unlike other NetworkManagers, with QuantumManager, each
+ create_networks calls should create only a single network.
+
+ Two scenarios exist:
+ - no 'uuid' is specified, in which case we contact
+ Quantum and create a new network.
+ - an existing 'uuid' is specified, corresponding to
+ a Quantum network created out of band.
+
+ In both cases, we initialize a subnet using the IPAM lib.
+ """
+ if num_networks != 1:
+ raise Exception(_("QuantumManager requires that only one"
+ " network is created per call"))
+ q_tenant_id = kwargs["project_id"] or FLAGS.quantum_default_tenant_id
+ quantum_net_id = uuid
+ if quantum_net_id:
+ if not self.q_conn.network_exists(q_tenant_id, quantum_net_id):
+ raise Exception(_("Unable to find existing quantum " \
+ " network for tenant '%(q_tenant_id)s' with "
+ "net-id '%(quantum_net_id)s'" % locals()))
+ else:
+ # otherwise, create network from default quantum pool
+ quantum_net_id = self.q_conn.create_network(q_tenant_id, label)
+
+ ipam_tenant_id = kwargs.get("project_id", None)
+ priority = kwargs.get("priority", 0)
+ self.ipam.create_subnet(context, label, ipam_tenant_id, quantum_net_id,
+ priority, cidr, gateway_v6, cidr_v6, dns1, dns2)
+
+ def delete_network(self, context, fixed_range):
+ """Lookup network by IPv4 cidr, delete both the IPAM
+ subnet and the corresponding Quantum network.
+ """
+ project_id = context.project_id
+ quantum_net_id = self.ipam.get_network_id_by_cidr(
+ context, fixed_range, project_id)
+ self.ipam.delete_subnets_by_net_id(context, quantum_net_id,
+ project_id)
+ q_tenant_id = project_id or FLAGS.quantum_default_tenant_id
+ self.q_conn.delete_network(q_tenant_id, quantum_net_id)
+
+ def allocate_for_instance(self, context, **kwargs):
+ """Called by compute when it is creating a new VM.
+
+ There are three key tasks:
+ - Determine the number and order of vNICs to create
+ - Allocate IP addresses
+ - Create ports on a Quantum network and attach vNICs.
+
+ We support two approaches to determining vNICs:
+ - By default, a VM gets a vNIC for any network belonging
+ to the VM's project, and a vNIC for any "global" network
+ that has a NULL project_id. vNIC order is determined
+ by the network's 'priority' field.
+ - If the 'os-create-server-ext' was used to create the VM,
+ only the networks in 'requested_networks' are used to
+ create vNICs, and the vNIC order is determiend by the
+ order in the requested_networks array.
+
+ For each vNIC, use the FlatManager to create the entries
+ in the virtual_interfaces table, contact Quantum to
+ create a port and attachment the vNIC, and use the IPAM
+ lib to allocate IP addresses.
+ """
+ instance_id = kwargs.pop('instance_id')
+ instance_type_id = kwargs['instance_type_id']
+ host = kwargs.pop('host')
+ project_id = kwargs.pop('project_id')
+ LOG.debug(_("network allocations for instance %s"), instance_id)
+
+ requested_networks = kwargs.get('requested_networks')
+
+ if requested_networks:
+ net_proj_pairs = [(net_id, project_id) \
+ for (net_id, _i) in requested_networks]
+ else:
+ net_proj_pairs = self.ipam.get_project_and_global_net_ids(context,
+ project_id)
+
+ # Create a port via quantum and attach the vif
+ for (quantum_net_id, project_id) in net_proj_pairs:
+
+ # FIXME(danwent): We'd like to have the manager be
+ # completely decoupled from the nova networks table.
+ # However, other parts of nova sometimes go behind our
+ # back and access network data directly from the DB. So
+ # for now, the quantum manager knows that there is a nova
+ # networks DB table and accesses it here. updating the
+ # virtual_interfaces table to use UUIDs would be one
+ # solution, but this would require significant work
+ # elsewhere.
+ admin_context = context.elevated()
+ network_ref = db.network_get_by_uuid(admin_context,
+ quantum_net_id)
+
+ vif_rec = manager.FlatManager.add_virtual_interface(self,
+ context, instance_id, network_ref['id'])
+
+ # talk to Quantum API to create and attach port.
+ q_tenant_id = project_id or FLAGS.quantum_default_tenant_id
+ self.q_conn.create_and_attach_port(q_tenant_id, quantum_net_id,
+ vif_rec['uuid'])
+ self.ipam.allocate_fixed_ip(context, project_id, quantum_net_id,
+ vif_rec)
+
+ return self.get_instance_nw_info(context, instance_id,
+ instance_type_id, host)
+
+ def get_instance_nw_info(self, context, instance_id,
+ instance_type_id, host):
+ """This method is used by compute to fetch all network data
+ that should be used when creating the VM.
+
+ The method simply loops through all virtual interfaces
+ stored in the nova DB and queries the IPAM lib to get
+ the associated IP data.
+
+ The format of returned data is 'defined' by the initial
+ set of NetworkManagers found in nova/network/manager.py .
+ Ideally this 'interface' will be more formally defined
+ in the future.
+ """
+ network_info = []
+ instance = db.instance_get(context, instance_id)
+ project_id = instance.project_id
+
+ admin_context = context.elevated()
+ vifs = db.virtual_interface_get_by_instance(admin_context,
+ instance_id)
+ for vif in vifs:
+ q_tenant_id = project_id
+ ipam_tenant_id = project_id
+ net_id, port_id = self.q_conn.get_port_by_attachment(q_tenant_id,
+ vif['uuid'])
+ if not net_id:
+ q_tenant_id = FLAGS.quantum_default_tenant_id
+ ipam_tenant_id = None
+ net_id, port_id = self.q_conn.get_port_by_attachment(
+ q_tenant_id, vif['uuid'])
+ if not net_id:
+ # TODO(bgh): We need to figure out a way to tell if we
+ # should actually be raising this exception or not.
+ # In the case that a VM spawn failed it may not have
+ # attached the vif and raising the exception here
+ # prevents deletion of the VM. In that case we should
+ # probably just log, continue, and move on.
+ raise Exception(_("No network for for virtual interface %s") %
+ vif['uuid'])
+ (v4_subnet, v6_subnet) = self.ipam.get_subnets_by_net_id(context,
+ ipam_tenant_id, net_id)
+ v4_ips = self.ipam.get_v4_ips_by_interface(context,
+ net_id, vif['uuid'],
+ project_id=ipam_tenant_id)
+ v6_ips = self.ipam.get_v6_ips_by_interface(context,
+ net_id, vif['uuid'],
+ project_id=ipam_tenant_id)
+
+ quantum_net_id = v4_subnet['network_id'] or v6_subnet['network_id']
+
+ def ip_dict(ip, subnet):
+ return {
+ "ip": ip,
+ "netmask": subnet["netmask"],
+ "enabled": "1"}
+
+ network_dict = {
+ 'cidr': v4_subnet['cidr'],
+ 'injected': True,
+ 'multi_host': False}
+
+ info = {
+ 'gateway': v4_subnet['gateway'],
+ 'dhcp_server': v4_subnet['gateway'],
+ 'broadcast': v4_subnet['broadcast'],
+ 'mac': vif['address'],
+ 'vif_uuid': vif['uuid'],
+ 'dns': [],
+ 'ips': [ip_dict(ip, v4_subnet) for ip in v4_ips]}
+
+ if v6_subnet:
+ if v6_subnet['cidr']:
+ network_dict['cidr_v6'] = v6_subnet['cidr']
+ info['ip6s'] = [ip_dict(ip, v6_subnet) for ip in v6_ips]
+
+ if v6_subnet['gateway']:
+ info['gateway6'] = v6_subnet['gateway']
+
+ dns_dict = {}
+ for s in [v4_subnet, v6_subnet]:
+ for k in ['dns1', 'dns2']:
+ if s and s[k]:
+ dns_dict[s[k]] = None
+ info['dns'] = [d for d in dns_dict.keys()]
+
+ network_info.append((network_dict, info))
+ return network_info
+
+ def deallocate_for_instance(self, context, **kwargs):
+ """Called when a VM is terminated. Loop through each virtual
+ interface in the Nova DB and remove the Quantum port and
+ clear the IP allocation using the IPAM. Finally, remove
+ the virtual interfaces from the Nova DB.
+ """
+ instance_id = kwargs.get('instance_id')
+ project_id = kwargs.pop('project_id', None)
+
+ admin_context = context.elevated()
+ vifs = db.virtual_interface_get_by_instance(admin_context,
+ instance_id)
+ for vif_ref in vifs:
+ interface_id = vif_ref['uuid']
+ q_tenant_id = project_id
+ ipam_tenant_id = project_id
+ (net_id, port_id) = self.q_conn.get_port_by_attachment(q_tenant_id,
+ interface_id)
+ if not net_id:
+ q_tenant_id = FLAGS.quantum_default_tenant_id
+ ipam_tenant_id = None
+ (net_id, port_id) = self.q_conn.get_port_by_attachment(
+ q_tenant_id, interface_id)
+ if not net_id:
+ LOG.error("Unable to find port with attachment: %s" %
+ (interface_id))
+ continue
+ self.q_conn.detach_and_delete_port(q_tenant_id,
+ net_id, port_id)
+
+ self.ipam.deallocate_ips_by_vif(context, ipam_tenant_id,
+ net_id, vif_ref)
+
+ try:
+ db.virtual_interface_delete_by_instance(admin_context,
+ instance_id)
+ except exception.InstanceNotFound:
+ LOG.error(_("Attempted to deallocate non-existent instance: %s" %
+ (instance_id)))
+
+ def validate_networks(self, context, networks):
+ """Validates that this tenant has quantum networks with the associated
+ UUIDs. This is called by the 'os-create-server-ext' API extension
+ code so that we can return an API error code to the caller if they
+ request an invalid network.
+ """
+ if networks is None:
+ return
+
+ project_id = context.project_id
+ for (net_id, _i) in networks:
+ self.ipam.verify_subnet_exists(context, project_id, net_id)
+ if not self.q_conn.network_exists(project_id, net_id):
+ raise exception.NetworkNotFound(network_id=net_id)
diff --git a/nova/network/quantum/melange_connection.py b/nova/network/quantum/melange_connection.py
new file mode 100644
index 000000000..71ac9b5f1
--- /dev/null
+++ b/nova/network/quantum/melange_connection.py
@@ -0,0 +1,141 @@
+# vim: tabstop=4 shiftwidth=4 softtabstop=4
+
+# Copyright 2010 OpenStack LLC.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import httplib
+import socket
+import urllib
+import json
+
+from nova import flags
+
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string('melange_host',
+ '127.0.0.1',
+ 'HOST for connecting to melange')
+
+flags.DEFINE_string('melange_port',
+ '9898',
+ 'PORT for connecting to melange')
+
+json_content_type = {'Content-type': "application/json"}
+
+
+# FIXME(danwent): talk to the Melange folks about creating a
+# client lib that we can import as a library, instead of
+# have to have all of the client code in here.
+class MelangeConnection(object):
+
+ def __init__(self, host=None, port=None, use_ssl=False):
+ if host is None:
+ host = FLAGS.melange_host
+ if port is None:
+ port = int(FLAGS.melange_port)
+ self.host = host
+ self.port = port
+ self.use_ssl = use_ssl
+ self.version = "v0.1"
+
+ def get(self, path, params=None, headers=None):
+ return self.do_request("GET", path, params=params, headers=headers)
+
+ def post(self, path, body=None, headers=None):
+ return self.do_request("POST", path, body=body, headers=headers)
+
+ def delete(self, path, headers=None):
+ return self.do_request("DELETE", path, headers=headers)
+
+ def _get_connection(self):
+ if self.use_ssl:
+ return httplib.HTTPSConnection(self.host, self.port)
+ else:
+ return httplib.HTTPConnection(self.host, self.port)
+
+ def do_request(self, method, path, body=None, headers=None, params=None):
+ headers = headers or {}
+ params = params or {}
+
+ url = "/%s/%s.json" % (self.version, path)
+ if params:
+ url += "?%s" % urllib.urlencode(params)
+ try:
+ connection = self._get_connection()
+ connection.request(method, url, body, headers)
+ response = connection.getresponse()
+ response_str = response.read()
+ if response.status < 400:
+ return response_str
+ raise Exception(_("Server returned error: %s" % response_str))
+ except (socket.error, IOError), e:
+ raise Exception(_("Unable to connect to "
+ "server. Got error: %s" % e))
+
+ def allocate_ip(self, network_id, vif_id,
+ project_id=None, mac_address=None):
+ tenant_scope = "/tenants/%s" % project_id if project_id else ""
+ request_body = (json.dumps(dict(network=dict(mac_address=mac_address,
+ tenant_id=project_id)))
+ if mac_address else None)
+ url = ("ipam%(tenant_scope)s/networks/%(network_id)s/"
+ "interfaces/%(vif_id)s/ip_allocations" % locals())
+ response = self.post(url, body=request_body,
+ headers=json_content_type)
+ return json.loads(response)['ip_addresses']
+
+ def create_block(self, network_id, cidr,
+ project_id=None, dns1=None, dns2=None):
+ tenant_scope = "/tenants/%s" % project_id if project_id else ""
+
+ url = "ipam%(tenant_scope)s/ip_blocks" % locals()
+
+ req_params = dict(ip_block=dict(cidr=cidr, network_id=network_id,
+ type='private', dns1=dns1, dns2=dns2))
+ self.post(url, body=json.dumps(req_params),
+ headers=json_content_type)
+
+ def delete_block(self, block_id, project_id=None):
+ tenant_scope = "/tenants/%s" % project_id if project_id else ""
+
+ url = "ipam%(tenant_scope)s/ip_blocks/%(block_id)s" % locals()
+
+ self.delete(url, headers=json_content_type)
+
+ def get_blocks(self, project_id=None):
+ tenant_scope = "/tenants/%s" % project_id if project_id else ""
+
+ url = "ipam%(tenant_scope)s/ip_blocks" % locals()
+
+ response = self.get(url, headers=json_content_type)
+ return json.loads(response)
+
+ def get_allocated_ips(self, network_id, vif_id, project_id=None):
+ tenant_scope = "/tenants/%s" % project_id if project_id else ""
+
+ url = ("ipam%(tenant_scope)s/networks/%(network_id)s/"
+ "interfaces/%(vif_id)s/ip_allocations" % locals())
+
+ response = self.get(url, headers=json_content_type)
+ return json.loads(response)['ip_addresses']
+
+ def deallocate_ips(self, network_id, vif_id, project_id=None):
+ tenant_scope = "/tenants/%s" % project_id if project_id else ""
+
+ url = ("ipam%(tenant_scope)s/networks/%(network_id)s/"
+ "interfaces/%(vif_id)s/ip_allocations" % locals())
+
+ self.delete(url, headers=json_content_type)
diff --git a/nova/network/quantum/melange_ipam_lib.py b/nova/network/quantum/melange_ipam_lib.py
new file mode 100644
index 000000000..a0ac10fd3
--- /dev/null
+++ b/nova/network/quantum/melange_ipam_lib.py
@@ -0,0 +1,205 @@
+# vim: tabstop=4 shiftwidth=4 softtabstop=4
+
+# Copyright 2011 Nicira Networks, Inc
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+from netaddr import IPNetwork
+
+from nova import db
+from nova import exception
+from nova import flags
+from nova import log as logging
+from nova.network.quantum import melange_connection
+
+
+LOG = logging.getLogger("nova.network.quantum.melange_ipam_lib")
+
+FLAGS = flags.FLAGS
+
+
+def get_ipam_lib(net_man):
+ return QuantumMelangeIPAMLib()
+
+
+class QuantumMelangeIPAMLib(object):
+ """Implements Quantum IP Address Management (IPAM) interface
+ using the Melange service, which is access using the Melange
+ web services API.
+ """
+
+ def __init__(self):
+ """Initialize class used to connect to Melange server"""
+ self.m_conn = melange_connection.MelangeConnection()
+
+ def create_subnet(self, context, label, project_id,
+ quantum_net_id, priority, cidr=None,
+ gateway_v6=None, cidr_v6=None,
+ dns1=None, dns2=None):
+ """Contact Melange and create a subnet for any non-NULL
+ IPv4 or IPv6 subnets.
+
+ Also create a entry in the Nova networks DB, but only
+ to store values not represented in Melange or to
+ temporarily provide compatibility with Nova code that
+ accesses IPAM data directly via the DB (e.g., nova-api)
+ """
+ tenant_id = project_id or FLAGS.quantum_default_tenant_id
+ if cidr:
+ self.m_conn.create_block(quantum_net_id, cidr,
+ project_id=tenant_id,
+ dns1=dns1, dns2=dns2)
+ if cidr_v6:
+ self.m_conn.create_block(quantum_net_id, cidr_v6,
+ project_id=tenant_id,
+ dns1=dns1, dns2=dns2)
+
+ net = {"uuid": quantum_net_id,
+ "project_id": project_id,
+ "priority": priority,
+ "label": label}
+ admin_context = context.elevated()
+ network = db.network_create_safe(admin_context, net)
+
+ def allocate_fixed_ip(self, context, project_id, quantum_net_id, vif_ref):
+ """Pass call to allocate fixed IP on to Melange"""
+ tenant_id = project_id or FLAGS.quantum_default_tenant_id
+ self.m_conn.allocate_ip(quantum_net_id,
+ vif_ref['uuid'], project_id=tenant_id,
+ mac_address=vif_ref['address'])
+
+ def get_network_id_by_cidr(self, context, cidr, project_id):
+ """Find the Quantum UUID associated with a IPv4 CIDR
+ address for the specified tenant.
+ """
+ tenant_id = project_id or FLAGS.quantum_default_tenant_id
+ all_blocks = self.m_conn.get_blocks(tenant_id)
+ for b in all_blocks['ip_blocks']:
+ if b['cidr'] == cidr:
+ return b['network_id']
+ raise exception.NotFound(_("No network found for cidr %s" % cidr))
+
+ def delete_subnets_by_net_id(self, context, net_id, project_id):
+ """Find Melange block associated with the Quantum UUID,
+ then tell Melange to delete that block.
+ """
+ admin_context = context.elevated()
+ tenant_id = project_id or FLAGS.quantum_default_tenant_id
+ all_blocks = self.m_conn.get_blocks(tenant_id)
+ for b in all_blocks['ip_blocks']:
+ if b['network_id'] == net_id:
+ self.m_conn.delete_block(b['id'], tenant_id)
+
+ network = db.network_get_by_uuid(admin_context, net_id)
+ db.network_delete_safe(context, network['id'])
+
+ def get_project_and_global_net_ids(self, context, project_id):
+ """Fetches all networks associated with this project, or
+ that are "global" (i.e., have no project set).
+ Returns list sorted by 'priority' (lowest integer value
+ is highest priority).
+ """
+ if project_id is None:
+ raise Exception(_("get_project_and_global_net_ids must be called"
+ " with a non-null project_id"))
+
+ admin_context = context.elevated()
+
+ # Decorate with priority
+ priority_nets = []
+ for tenant_id in (project_id, FLAGS.quantum_default_tenant_id):
+ blocks = self.m_conn.get_blocks(tenant_id)
+ for ip_block in blocks['ip_blocks']:
+ network_id = ip_block['network_id']
+ network = db.network_get_by_uuid(admin_context, network_id)
+ if network:
+ priority = network['priority']
+ priority_nets.append((priority, network_id, tenant_id))
+
+ # Sort by priority
+ priority_nets.sort()
+
+ # Undecorate
+ return [(network_id, tenant_id)
+ for priority, network_id, tenant_id in priority_nets]
+
+ def get_subnets_by_net_id(self, context, project_id, net_id):
+ """Returns information about the IPv4 and IPv6 subnets
+ associated with a Quantum Network UUID.
+ """
+
+ # FIXME(danwent): Melange actually returns the subnet info
+ # when we query for a particular interface. We may want to
+ # rework the ipam_manager python API to let us take advantage of
+ # this, as right now we have to get all blocks and cycle through
+ # them.
+ subnet_v4 = None
+ subnet_v6 = None
+ tenant_id = project_id or FLAGS.quantum_default_tenant_id
+ all_blocks = self.m_conn.get_blocks(tenant_id)
+ for b in all_blocks['ip_blocks']:
+ if b['network_id'] == net_id:
+ subnet = {'network_id': b['network_id'],
+ 'cidr': b['cidr'],
+ 'gateway': b['gateway'],
+ 'broadcast': b['broadcast'],
+ 'netmask': b['netmask'],
+ 'dns1': b['dns1'],
+ 'dns2': b['dns2']}
+
+ if IPNetwork(b['cidr']).version == 6:
+ subnet_v6 = subnet
+ else:
+ subnet_v4 = subnet
+ return (subnet_v4, subnet_v6)
+
+ def get_v4_ips_by_interface(self, context, net_id, vif_id, project_id):
+ """Returns a list of IPv4 address strings associated with
+ the specified virtual interface.
+ """
+ return self._get_ips_by_interface(context, net_id, vif_id,
+ project_id, 4)
+
+ def get_v6_ips_by_interface(self, context, net_id, vif_id, project_id):
+ """Returns a list of IPv6 address strings associated with
+ the specified virtual interface.
+ """
+ return self._get_ips_by_interface(context, net_id, vif_id,
+ project_id, 6)
+
+ def _get_ips_by_interface(self, context, net_id, vif_id, project_id,
+ ip_version):
+ """Helper method to fetch v4 or v6 addresses for a particular
+ virtual interface.
+ """
+ tenant_id = project_id or FLAGS.quantum_default_tenant_id
+ ip_list = self.m_conn.get_allocated_ips(net_id, vif_id, tenant_id)
+ return [ip['address'] for ip in ip_list
+ if IPNetwork(ip['address']).version == ip_version]
+
+ def verify_subnet_exists(self, context, project_id, quantum_net_id):
+ """Confirms that a subnet exists that is associated with the
+ specified Quantum Network UUID.
+ """
+ tenant_id = project_id or FLAGS.quantum_default_tenant_id
+ v4_subnet, v6_subnet = self.get_subnets_by_net_id(context, tenant_id,
+ quantum_net_id)
+ return v4_subnet is not None
+
+ def deallocate_ips_by_vif(self, context, project_id, net_id, vif_ref):
+ """Deallocate all fixed IPs associated with the specified
+ virtual interface.
+ """
+ tenant_id = project_id or FLAGS.quantum_default_tenant_id
+ self.m_conn.deallocate_ips(net_id, vif_ref['uuid'], tenant_id)
diff --git a/nova/network/quantum/nova_ipam_lib.py b/nova/network/quantum/nova_ipam_lib.py
new file mode 100644
index 000000000..21dee8f6a
--- /dev/null
+++ b/nova/network/quantum/nova_ipam_lib.py
@@ -0,0 +1,195 @@
+# vim: tabstop=4 shiftwidth=4 softtabstop=4
+
+# Copyright 2011 Nicira Networks, Inc
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import netaddr
+
+from nova import db
+from nova import exception
+from nova import flags
+from nova import ipv6
+from nova import log as logging
+from nova.network import manager
+from nova.network.quantum import melange_connection as melange
+from nova import utils
+
+
+LOG = logging.getLogger("nova.network.quantum.nova_ipam_lib")
+
+FLAGS = flags.FLAGS
+
+
+def get_ipam_lib(net_man):
+ return QuantumNovaIPAMLib(net_man)
+
+
+class QuantumNovaIPAMLib(object):
+ """Implements Quantum IP Address Management (IPAM) interface
+ using the local Nova database. This implementation is inline
+ with how IPAM is used by other NetworkManagers.
+ """
+
+ def __init__(self, net_manager):
+ """Holds a reference to the "parent" network manager, used
+ to take advantage of various FlatManager methods to avoid
+ code duplication.
+ """
+ self.net_manager = net_manager
+
+ def create_subnet(self, context, label, tenant_id,
+ quantum_net_id, priority, cidr=None,
+ gateway_v6=None, cidr_v6=None,
+ dns1=None, dns2=None):
+ """Re-use the basic FlatManager create_networks method to
+ initialize the networks and fixed_ips tables in Nova DB.
+
+ Also stores a few more fields in the networks table that
+ are needed by Quantum but not the FlatManager.
+ """
+ admin_context = context.elevated()
+ subnet_size = len(netaddr.IPNetwork(cidr))
+ networks = manager.FlatManager.create_networks(self.net_manager,
+ admin_context, label, cidr,
+ False, 1, subnet_size, cidr_v6,
+ gateway_v6, quantum_net_id, None, dns1, dns2)
+
+ if len(networks) != 1:
+ raise Exception(_("Error creating network entry"))
+
+ network = networks[0]
+ net = {"project_id": tenant_id,
+ "priority": priority,
+ "uuid": quantum_net_id}
+ db.network_update(admin_context, network['id'], net)
+
+ def get_network_id_by_cidr(self, context, cidr, project_id):
+ """ Grabs Quantum network UUID based on IPv4 CIDR. """
+ admin_context = context.elevated()
+ network = db.network_get_by_cidr(admin_context, cidr)
+ if not network:
+ raise Exception(_("No network with fixed_range = %s" %
+ fixed_range))
+ return network['uuid']
+
+ def delete_subnets_by_net_id(self, context, net_id, project_id):
+ """Deletes a network based on Quantum UUID. Uses FlatManager
+ delete_network to avoid duplication.
+ """
+ admin_context = context.elevated()
+ network = db.network_get_by_uuid(admin_context, net_id)
+ if not network:
+ raise Exception(_("No network with net_id = %s" % net_id))
+ manager.FlatManager.delete_network(self.net_manager,
+ admin_context, network['cidr'],
+ require_disassociated=False)
+
+ def get_project_and_global_net_ids(self, context, project_id):
+ """Fetches all networks associated with this project, or
+ that are "global" (i.e., have no project set).
+ Returns list sorted by 'priority'.
+ """
+ admin_context = context.elevated()
+ networks = db.project_get_networks(admin_context, project_id, False)
+ networks.extend(db.project_get_networks(admin_context, None, False))
+ id_priority_map = {}
+ net_list = []
+ for n in networks:
+ net_id = n['uuid']
+ net_list.append((net_id, n["project_id"]))
+ id_priority_map[net_id] = n['priority']
+ return sorted(net_list, key=lambda x: id_priority_map[x[0]])
+
+ def allocate_fixed_ip(self, context, tenant_id, quantum_net_id, vif_rec):
+ """Allocates a single fixed IPv4 address for a virtual interface."""
+ admin_context = context.elevated()
+ network = db.network_get_by_uuid(admin_context, quantum_net_id)
+ if network['cidr']:
+ address = db.fixed_ip_associate_pool(admin_context,
+ network['id'],
+ vif_rec['instance_id'])
+ values = {'allocated': True,
+ 'virtual_interface_id': vif_rec['id']}
+ db.fixed_ip_update(admin_context, address, values)
+
+ def get_subnets_by_net_id(self, context, tenant_id, net_id):
+ """Returns information about the IPv4 and IPv6 subnets
+ associated with a Quantum Network UUID.
+ """
+ n = db.network_get_by_uuid(context.elevated(), net_id)
+ subnet_data_v4 = {
+ 'network_id': n['uuid'],
+ 'cidr': n['cidr'],
+ 'gateway': n['gateway'],
+ 'broadcast': n['broadcast'],
+ 'netmask': n['netmask'],
+ 'dns1': n['dns1'],
+ 'dns2': n['dns2']}
+ subnet_data_v6 = {
+ 'network_id': n['uuid'],
+ 'cidr': n['cidr_v6'],
+ 'gateway': n['gateway_v6'],
+ 'broadcast': None,
+ 'netmask': None,
+ 'dns1': None,
+ 'dns2': None}
+ return (subnet_data_v4, subnet_data_v6)
+
+ def get_v4_ips_by_interface(self, context, net_id, vif_id, project_id):
+ """Returns a list of IPv4 address strings associated with
+ the specified virtual interface, based on the fixed_ips table.
+ """
+ vif_rec = db.virtual_interface_get_by_uuid(context, vif_id)
+ fixed_ips = db.fixed_ip_get_by_virtual_interface(context,
+ vif_rec['id'])
+ return [fixed_ip['address'] for fixed_ip in fixed_ips]
+
+ def get_v6_ips_by_interface(self, context, net_id, vif_id, project_id):
+ """Returns a list containing a single IPv6 address strings
+ associated with the specified virtual interface.
+ """
+ admin_context = context.elevated()
+ network = db.network_get_by_uuid(admin_context, net_id)
+ vif_rec = db.virtual_interface_get_by_uuid(context, vif_id)
+ if network['cidr_v6']:
+ ip = ipv6.to_global(network['cidr_v6'],
+ vif_rec['address'],
+ project_id)
+ return [ip]
+ return []
+
+ def verify_subnet_exists(self, context, tenant_id, quantum_net_id):
+ """Confirms that a subnet exists that is associated with the
+ specified Quantum Network UUID. Raises an exception if no
+ such subnet exists.
+ """
+ admin_context = context.elevated()
+ db.network_get_by_uuid(admin_context, quantum_net_id)
+
+ def deallocate_ips_by_vif(self, context, tenant_id, net_id, vif_ref):
+ """Deallocate all fixed IPs associated with the specified
+ virtual interface.
+ """
+ try:
+ admin_context = context.elevated()
+ fixed_ips = db.fixed_ip_get_by_virtual_interface(admin_context,
+ vif_ref['id'])
+ for fixed_ip in fixed_ips:
+ db.fixed_ip_update(admin_context, fixed_ip['address'],
+ {'allocated': False,
+ 'virtual_interface_id': None})
+ except exception.FixedIpNotFoundForInstance:
+ LOG.error(_('No fixed IPs to deallocate for vif %s' %
+ vif_ref['id']))
diff --git a/nova/network/quantum/quantum_connection.py b/nova/network/quantum/quantum_connection.py
new file mode 100644
index 000000000..21917653c
--- /dev/null
+++ b/nova/network/quantum/quantum_connection.py
@@ -0,0 +1,118 @@
+# vim: tabstop=4 shiftwidth=4 softtabstop=4
+
+# Copyright 2011 Nicira Networks
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+from nova import flags
+from nova import log as logging
+from nova.network.quantum import client as quantum_client
+from nova import utils
+
+
+LOG = logging.getLogger("nova.network.quantum.quantum_connection")
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string('quantum_connection_host',
+ '127.0.0.1',
+ 'HOST for connecting to quantum')
+
+flags.DEFINE_string('quantum_connection_port',
+ '9696',
+ 'PORT for connecting to quantum')
+
+flags.DEFINE_string('quantum_default_tenant_id',
+ "default",
+ 'Default tenant id when creating quantum networks')
+
+
+class QuantumClientConnection(object):
+ """Abstracts connection to Quantum service into higher level
+ operations performed by the QuantumManager.
+
+ Separating this out as a class also let's us create a 'fake'
+ version of this class for unit tests.
+ """
+
+ def __init__(self):
+ """Initialize Quantum client class based on flags."""
+ self.client = quantum_client.Client(FLAGS.quantum_connection_host,
+ FLAGS.quantum_connection_port,
+ format="json",
+ logger=LOG)
+
+ def create_network(self, tenant_id, network_name):
+ """Create network using specified name, return Quantum
+ network UUID.
+ """
+ data = {'network': {'name': network_name}}
+ resdict = self.client.create_network(data, tenant=tenant_id)
+ return resdict["network"]["id"]
+
+ def delete_network(self, tenant_id, net_id):
+ """Deletes Quantum network with specified UUID."""
+ self.client.delete_network(net_id, tenant=tenant_id)
+
+ def network_exists(self, tenant_id, net_id):
+ """Determine if a Quantum network exists for the
+ specified tenant.
+ """
+ try:
+ self.client.show_network_details(net_id, tenant=tenant_id)
+ return True
+ except client.QuantumNotFoundException:
+ # Not really an error. Real errors will be propogated to caller
+ return False
+
+ def create_and_attach_port(self, tenant_id, net_id, interface_id):
+ """Creates a Quantum port on the specified network, sets
+ status to ACTIVE to enable traffic, and attaches the
+ vNIC with the specified interface-id.
+ """
+ LOG.debug(_("Connecting interface %(interface_id)s to "
+ "net %(net_id)s for %(tenant_id)s" % locals()))
+ port_data = {'port': {'state': 'ACTIVE'}}
+ resdict = self.client.create_port(net_id, port_data, tenant=tenant_id)
+ port_id = resdict["port"]["id"]
+
+ attach_data = {'attachment': {'id': interface_id}}
+ self.client.attach_resource(net_id, port_id, attach_data,
+ tenant=tenant_id)
+
+ def detach_and_delete_port(self, tenant_id, net_id, port_id):
+ """Detach and delete the specified Quantum port."""
+ LOG.debug(_("Deleting port %(port_id)s on net %(net_id)s"
+ " for %(tenant_id)s" % locals()))
+
+ self.client.detach_resource(net_id, port_id, tenant=tenant_id)
+ self.client.delete_port(net_id, port_id, tenant=tenant_id)
+
+ def get_port_by_attachment(self, tenant_id, attachment_id):
+ """Given a tenant, search for the Quantum network and port
+ UUID that has the specified interface-id attachment.
+ """
+ # FIXME(danwent): this will be inefficient until the Quantum
+ # API implements querying a port by the interface-id
+ net_list_resdict = self.client.list_networks(tenant=tenant_id)
+ for n in net_list_resdict["networks"]:
+ net_id = n['id']
+ port_list_resdict = self.client.list_ports(net_id,
+ tenant=tenant_id)
+ for p in port_list_resdict["ports"]:
+ port_id = p["id"]
+ port_get_resdict = self.client.show_port_attachment(net_id,
+ port_id, tenant=tenant_id)
+ if attachment_id == port_get_resdict["attachment"]["id"]:
+ return (net_id, port_id)
+ return (None, None)
diff --git a/nova/scheduler/base_scheduler.py b/nova/scheduler/base_scheduler.py
index 35e5af035..e9c078b81 100644
--- a/nova/scheduler/base_scheduler.py
+++ b/nova/scheduler/base_scheduler.py
@@ -55,5 +55,17 @@ class BaseScheduler(abstract_scheduler.AbstractScheduler):
scheduling objectives
"""
# NOTE(sirp): The default logic is the same as the NoopCostFunction
- return [dict(weight=1, hostname=hostname, capabilities=capabilities)
- for hostname, capabilities in hosts]
+ hosts = [dict(weight=1, hostname=hostname, capabilities=capabilities)
+ for hostname, capabilities in hosts]
+
+ # NOTE(Vek): What we actually need to return is enough hosts
+ # for all the instances!
+ num_instances = request_spec.get('num_instances', 1)
+ instances = []
+ while num_instances > len(hosts):
+ instances.extend(hosts)
+ num_instances -= len(hosts)
+ if num_instances > 0:
+ instances.extend(hosts[:num_instances])
+
+ return instances
diff --git a/nova/tests/scheduler/test_abstract_scheduler.py b/nova/tests/scheduler/test_abstract_scheduler.py
index aa97e2344..9bf128b13 100644
--- a/nova/tests/scheduler/test_abstract_scheduler.py
+++ b/nova/tests/scheduler/test_abstract_scheduler.py
@@ -26,6 +26,7 @@ from nova import test
from nova.compute import api as compute_api
from nova.scheduler import driver
from nova.scheduler import abstract_scheduler
+from nova.scheduler import base_scheduler
from nova.scheduler import zone_manager
@@ -65,6 +66,11 @@ class FakeAbstractScheduler(abstract_scheduler.AbstractScheduler):
pass
+class FakeBaseScheduler(base_scheduler.BaseScheduler):
+ # No need to stub anything at the moment
+ pass
+
+
class FakeZoneManager(zone_manager.ZoneManager):
def __init__(self):
self.service_states = {
@@ -365,3 +371,30 @@ class AbstractSchedulerTestCase(test.TestCase):
self.assertEqual(fixture._decrypt_blob(test_data),
json.dumps(test_data))
+
+
+class BaseSchedulerTestCase(test.TestCase):
+ """Test case for Base Scheduler."""
+
+ def test_weigh_hosts(self):
+ """
+ Try to weigh a short list of hosts and make sure enough
+ entries for a larger number instances are returned.
+ """
+
+ sched = FakeBaseScheduler()
+
+ # Fake out a list of hosts
+ zm = FakeZoneManager()
+ hostlist = [(host, services['compute'])
+ for host, services in zm.service_states.items()
+ if 'compute' in services]
+
+ # Call weigh_hosts()
+ num_instances = len(hostlist) * 2 + len(hostlist) / 2
+ instlist = sched.weigh_hosts('compute',
+ dict(num_instances=num_instances),
+ hostlist)
+
+ # Should be enough entries to cover all instances
+ self.assertEqual(len(instlist), num_instances)
diff --git a/nova/tests/test_network.py b/nova/tests/test_network.py
index 25ff940f0..2ae5a35e3 100644
--- a/nova/tests/test_network.py
+++ b/nova/tests/test_network.py
@@ -118,9 +118,14 @@ vifs = [{'id': 0,
{'id': 1,
'address': 'DE:AD:BE:EF:00:01',
'uuid': '00000000-0000-0000-0000-0000000000000001',
- 'network_id': 0,
'network_id': 1,
'network': FakeModel(**networks[1]),
+ 'instance_id': 0},
+ {'id': 2,
+ 'address': 'DE:AD:BE:EF:00:02',
+ 'uuid': '00000000-0000-0000-0000-0000000000000002',
+ 'network_id': 2,
+ 'network': None,
'instance_id': 0}]
diff --git a/nova/tests/test_quantum.py b/nova/tests/test_quantum.py
new file mode 100644
index 000000000..0feec9b99
--- /dev/null
+++ b/nova/tests/test_quantum.py
@@ -0,0 +1,323 @@
+# vim: tabstop=4 shiftwidth=4 softtabstop=4
+
+# Copyright 2011 Nicira, Inc.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+from nova import context
+from nova import db
+from nova.db.sqlalchemy import models
+from nova.db.sqlalchemy.session import get_session
+from nova import exception
+from nova import ipv6
+from nova import log as logging
+from nova.network.quantum import manager as quantum_manager
+from nova import test
+from nova import utils
+
+LOG = logging.getLogger('nova.tests.quantum_network')
+
+
+# this class can be used for unit functional/testing on nova,
+# as it does not actually make remote calls to the Quantum service
+class FakeQuantumClientConnection(object):
+
+ def __init__(self):
+ self.nets = {}
+
+ def get_networks_for_tenant(self, tenant_id):
+ net_ids = []
+ for net_id, n in self.nets.items():
+ if n['tenant-id'] == tenant_id:
+ net_ids.append(net_id)
+ return net_ids
+
+ def create_network(self, tenant_id, network_name):
+
+ uuid = str(utils.gen_uuid())
+ self.nets[uuid] = {'net-name': network_name,
+ 'tenant-id': tenant_id,
+ 'ports': {}}
+ return uuid
+
+ def delete_network(self, tenant_id, net_id):
+ if self.nets[net_id]['tenant-id'] == tenant_id:
+ del self.nets[net_id]
+
+ def network_exists(self, tenant_id, net_id):
+ try:
+ return self.nets[net_id]['tenant-id'] == tenant_id
+ except KeyError:
+ return False
+
+ def _confirm_not_attached(self, interface_id):
+ for n in self.nets.values():
+ for p in n['ports'].values():
+ if p['attachment-id'] == interface_id:
+ raise Exception(_("interface '%s' is already attached" %
+ interface_id))
+
+ def create_and_attach_port(self, tenant_id, net_id, interface_id):
+ if not self.network_exists(tenant_id, net_id):
+ raise Exception(
+ _("network %(net_id)s does not exist for tenant %(tenant_id)"
+ % locals()))
+
+ self._confirm_not_attached(interface_id)
+ uuid = str(utils.gen_uuid())
+ self.nets[net_id]['ports'][uuid] = \
+ {"port-state": "ACTIVE",
+ "attachment-id": interface_id}
+
+ def detach_and_delete_port(self, tenant_id, net_id, port_id):
+ if not self.network_exists(tenant_id, net_id):
+ raise exception.NotFound(
+ _("network %(net_id)s does not exist "
+ "for tenant %(tenant_id)s" % locals()))
+ del self.nets[net_id]['ports'][port_id]
+
+ def get_port_by_attachment(self, tenant_id, attachment_id):
+ for net_id, n in self.nets.items():
+ if n['tenant-id'] == tenant_id:
+ for port_id, p in n['ports'].items():
+ if p['attachment-id'] == attachment_id:
+ return (net_id, port_id)
+
+ return (None, None)
+
+networks = [{'label': 'project1-net1',
+ 'injected': False,
+ 'multi_host': False,
+ 'cidr': '192.168.0.0/24',
+ 'cidr_v6': '2001:1db8::/64',
+ 'gateway_v6': '2001:1db8::1',
+ 'netmask_v6': '64',
+ 'netmask': '255.255.255.0',
+ 'bridge': None,
+ 'bridge_interface': None,
+ 'gateway': '192.168.0.1',
+ 'broadcast': '192.168.0.255',
+ 'dns1': '192.168.0.1',
+ 'dns2': '192.168.0.2',
+ 'vlan': None,
+ 'host': None,
+ 'vpn_public_address': None,
+ 'project_id': 'fake_project1',
+ 'priority': 1},
+ {'label': 'project2-net1',
+ 'injected': False,
+ 'multi_host': False,
+ 'cidr': '192.168.1.0/24',
+ 'cidr_v6': '2001:1db9::/64',
+ 'gateway_v6': '2001:1db9::1',
+ 'netmask_v6': '64',
+ 'netmask': '255.255.255.0',
+ 'bridge': None,
+ 'bridge_interface': None,
+ 'gateway': '192.168.1.1',
+ 'broadcast': '192.168.1.255',
+ 'dns1': '192.168.0.1',
+ 'dns2': '192.168.0.2',
+ 'vlan': None,
+ 'host': None,
+ 'project_id': 'fake_project2',
+ 'priority': 1},
+ {'label': "public",
+ 'injected': False,
+ 'multi_host': False,
+ 'cidr': '10.0.0.0/24',
+ 'cidr_v6': '2001:1dba::/64',
+ 'gateway_v6': '2001:1dba::1',
+ 'netmask_v6': '64',
+ 'netmask': '255.255.255.0',
+ 'bridge': None,
+ 'bridge_interface': None,
+ 'gateway': '10.0.0.1',
+ 'broadcast': '10.0.0.255',
+ 'dns1': '10.0.0.1',
+ 'dns2': '10.0.0.2',
+ 'vlan': None,
+ 'host': None,
+ 'project_id': None,
+ 'priority': 0},
+ {'label': "project2-net2",
+ 'injected': False,
+ 'multi_host': False,
+ 'cidr': '9.0.0.0/24',
+ 'cidr_v6': '2001:1dbb::/64',
+ 'gateway_v6': '2001:1dbb::1',
+ 'netmask_v6': '64',
+ 'netmask': '255.255.255.0',
+ 'bridge': None,
+ 'bridge_interface': None,
+ 'gateway': '9.0.0.1',
+ 'broadcast': '9.0.0.255',
+ 'dns1': '9.0.0.1',
+ 'dns2': '9.0.0.2',
+ 'vlan': None,
+ 'host': None,
+ 'project_id': "fake_project2",
+ 'priority': 2}]
+
+
+# this is a base class to be used by all other Quantum Test classes
+class QuantumTestCaseBase(object):
+
+ def test_create_and_delete_nets(self):
+ self._create_nets()
+ self._delete_nets()
+
+ def _create_nets(self):
+ for n in networks:
+ ctx = context.RequestContext('user1', n['project_id'])
+ self.net_man.create_networks(ctx,
+ label=n['label'], cidr=n['cidr'],
+ multi_host=n['multi_host'],
+ num_networks=1, network_size=256, cidr_v6=n['cidr_v6'],
+ gateway_v6=n['gateway_v6'], bridge=None,
+ bridge_interface=None, dns1=n['dns1'],
+ dns2=n['dns2'], project_id=n['project_id'],
+ priority=n['priority'])
+
+ def _delete_nets(self):
+ for n in networks:
+ ctx = context.RequestContext('user1', n['project_id'])
+ self.net_man.delete_network(ctx, n['cidr'])
+
+ def test_allocate_and_deallocate_instance_static(self):
+ self._create_nets()
+
+ project_id = "fake_project1"
+ ctx = context.RequestContext('user1', project_id)
+
+ instance_ref = db.api.instance_create(ctx,
+ {"project_id": project_id})
+ nw_info = self.net_man.allocate_for_instance(ctx,
+ instance_id=instance_ref['id'], host="",
+ instance_type_id=instance_ref['instance_type_id'],
+ project_id=project_id)
+
+ self.assertEquals(len(nw_info), 2)
+
+ # we don't know which order the NICs will be in until we
+ # introduce the notion of priority
+ # v4 cidr
+ self.assertTrue(nw_info[0][0]['cidr'].startswith("10."))
+ self.assertTrue(nw_info[1][0]['cidr'].startswith("192."))
+
+ # v4 address
+ self.assertTrue(nw_info[0][1]['ips'][0]['ip'].startswith("10."))
+ self.assertTrue(nw_info[1][1]['ips'][0]['ip'].startswith("192."))
+
+ # v6 cidr
+ self.assertTrue(nw_info[0][0]['cidr_v6'].startswith("2001:1dba:"))
+ self.assertTrue(nw_info[1][0]['cidr_v6'].startswith("2001:1db8:"))
+
+ # v6 address
+ self.assertTrue(
+ nw_info[0][1]['ip6s'][0]['ip'].startswith("2001:1dba:"))
+ self.assertTrue(
+ nw_info[1][1]['ip6s'][0]['ip'].startswith("2001:1db8:"))
+
+ self.net_man.deallocate_for_instance(ctx,
+ instance_id=instance_ref['id'],
+ project_id=project_id)
+
+ self._delete_nets()
+
+ def test_allocate_and_deallocate_instance_dynamic(self):
+ self._create_nets()
+ project_id = "fake_project2"
+ ctx = context.RequestContext('user1', project_id)
+
+ net_ids = self.net_man.q_conn.get_networks_for_tenant(project_id)
+ requested_networks = [(net_id, None) for net_id in net_ids]
+
+ self.net_man.validate_networks(ctx, requested_networks)
+
+ instance_ref = db.api.instance_create(ctx,
+ {"project_id": project_id})
+ nw_info = self.net_man.allocate_for_instance(ctx,
+ instance_id=instance_ref['id'], host="",
+ instance_type_id=instance_ref['instance_type_id'],
+ project_id=project_id,
+ requested_networks=requested_networks)
+
+ self.assertEquals(len(nw_info), 2)
+
+ # we don't know which order the NICs will be in until we
+ # introduce the notion of priority
+ # v4 cidr
+ self.assertTrue(nw_info[0][0]['cidr'].startswith("9.") or
+ nw_info[1][0]['cidr'].startswith("9."))
+ self.assertTrue(nw_info[0][0]['cidr'].startswith("192.") or
+ nw_info[1][0]['cidr'].startswith("192."))
+
+ # v4 address
+ self.assertTrue(nw_info[0][1]['ips'][0]['ip'].startswith("9.") or
+ nw_info[1][1]['ips'][0]['ip'].startswith("9."))
+ self.assertTrue(nw_info[0][1]['ips'][0]['ip'].startswith("192.") or
+ nw_info[1][1]['ips'][0]['ip'].startswith("192."))
+
+ # v6 cidr
+ self.assertTrue(nw_info[0][0]['cidr_v6'].startswith("2001:1dbb:") or
+ nw_info[1][0]['cidr_v6'].startswith("2001:1dbb:"))
+ self.assertTrue(nw_info[0][0]['cidr_v6'].startswith("2001:1db9:") or
+ nw_info[1][0]['cidr_v6'].startswith("2001:1db9:"))
+
+ # v6 address
+ self.assertTrue(
+ nw_info[0][1]['ip6s'][0]['ip'].startswith("2001:1dbb:") or
+ nw_info[1][1]['ip6s'][0]['ip'].startswith("2001:1dbb:"))
+ self.assertTrue(
+ nw_info[0][1]['ip6s'][0]['ip'].startswith("2001:1db9:") or
+ nw_info[1][1]['ip6s'][0]['ip'].startswith("2001:1db9:"))
+
+ self.net_man.deallocate_for_instance(ctx,
+ instance_id=instance_ref['id'],
+ project_id=project_id)
+
+ self._delete_nets()
+
+ def test_validate_bad_network(self):
+ ctx = context.RequestContext('user1', 'fake_project1')
+ self.assertRaises(exception.NetworkNotFound,
+ self.net_man.validate_networks, ctx, [("", None)])
+
+
+class QuantumNovaIPAMTestCase(QuantumTestCaseBase, test.TestCase):
+
+ def setUp(self):
+ super(QuantumNovaIPAMTestCase, self).setUp()
+
+ self.net_man = quantum_manager.QuantumManager(
+ ipam_lib="nova.network.quantum.nova_ipam_lib",
+ q_conn=FakeQuantumClientConnection())
+
+ # Tests seem to create some networks by default, which
+ # we don't want. So we delete them.
+
+ ctx = context.RequestContext('user1', 'fake_project1').elevated()
+ for n in db.network_get_all(ctx):
+ db.network_delete_safe(ctx, n['id'])
+
+ # Other unit tests (e.g., test_compute.py) have a nasty
+ # habit of of creating fixed IPs and not cleaning up, which
+ # can confuse these tests, so we remove all existing fixed
+ # ips before starting.
+ session = get_session()
+ result = session.query(models.FixedIp).all()
+ with session.begin():
+ for fip_ref in result:
+ session.delete(fip_ref)
diff --git a/nova/tests/test_xenapi.py b/nova/tests/test_xenapi.py
index 45dad3516..91b4161b0 100644
--- a/nova/tests/test_xenapi.py
+++ b/nova/tests/test_xenapi.py
@@ -494,6 +494,7 @@ class XenAPIVMTestCase(test.TestCase):
self.check_vm_params_for_linux_with_external_kernel()
def test_spawn_netinject_file(self):
+ self.flags(flat_injected=True)
db_fakes.stub_out_db_instance_api(self.stubs, injected=True)
self._tee_executed = False
@@ -611,7 +612,6 @@ class XenAPIVMTestCase(test.TestCase):
str(3 * 1024))
def test_rescue(self):
- self.flags(flat_injected=False)
instance = self._create_instance()
conn = xenapi_conn.get_connection(False)
conn.rescue(self.context, instance, None, [])
diff --git a/nova/virt/libvirt/vif.py b/nova/virt/libvirt/vif.py
index 0b7438011..077c32474 100644
--- a/nova/virt/libvirt/vif.py
+++ b/nova/virt/libvirt/vif.py
@@ -101,7 +101,7 @@ class LibvirtOpenVswitchDriver(VIFDriver):
"""VIF driver for Open vSwitch."""
def get_dev_name(_self, iface_id):
- return "tap-" + iface_id[0:15]
+ return "tap" + iface_id[0:11]
def plug(self, instance, network, mapping):
iface_id = mapping['vif_uuid']
diff --git a/nova/virt/xenapi/vmops.py b/nova/virt/xenapi/vmops.py
index c5f105f40..9c138ee41 100644
--- a/nova/virt/xenapi/vmops.py
+++ b/nova/virt/xenapi/vmops.py
@@ -188,9 +188,16 @@ class VMOps(object):
ramdisk = VMHelper.fetch_image(context, self._session,
instance, instance.ramdisk_id, instance.user_id,
instance.project_id, ImageType.RAMDISK)[0]
- # Create the VM ref and attach the first disk
- first_vdi_ref = self._session.call_xenapi('VDI.get_by_uuid',
- vdis[0]['vdi_uuid'])
+
+ # NOTE(jk0): Since vdi_type may contain either 'os' or 'swap', we
+ # need to ensure that the 'swap' VDI is not chosen as the mount
+ # point for file injection.
+ first_vdi_ref = None
+ for vdi in vdis:
+ if vdi.get('vdi_type') != 'swap':
+ # Create the VM ref and attach the first disk
+ first_vdi_ref = self._session.call_xenapi(
+ 'VDI.get_by_uuid', vdi['vdi_uuid'])
vm_mode = instance.vm_mode and instance.vm_mode.lower()
if vm_mode == 'pv':
href='#n4255'>4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526
/*
  Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com>
  This file is part of GlusterFS.

  GlusterFS is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published
  by the Free Software Foundation; either version 3 of the License,
  or (at your option) any later version.

  GlusterFS is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program.  If not, see
  <http://www.gnu.org/licenses/>.
*/

#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif

#define __XOPEN_SOURCE 500

#include <stdint.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <errno.h>
#include <libgen.h>
#include <pthread.h>
#include <ftw.h>

#ifndef GF_BSD_HOST_OS
#include <alloca.h>
#endif /* GF_BSD_HOST_OS */

#include "glusterfs.h"
#include "md5.h"
#include "checksum.h"
#include "dict.h"
#include "logging.h"
#include "posix.h"
#include "xlator.h"
#include "defaults.h"
#include "common-utils.h"
#include "compat-errno.h"
#include "compat.h"
#include "byte-order.h"
#include "syscall.h"
#include "statedump.h"
#include "locking.h"
#include "timer.h"
#include "glusterfs3-xdr.h"

#undef HAVE_SET_FSID
#ifdef HAVE_SET_FSID

#define DECLARE_OLD_FS_ID_VAR uid_t old_fsuid; gid_t old_fsgid;

#define SET_FS_ID(uid, gid) do {		\
                old_fsuid = setfsuid (uid);     \
                old_fsgid = setfsgid (gid);     \
        } while (0)

#define SET_TO_OLD_FS_ID() do {			\
                setfsuid (old_fsuid);           \
                setfsgid (old_fsgid);           \
        } while (0)

#else

#define DECLARE_OLD_FS_ID_VAR
#define SET_FS_ID(uid, gid)
#define SET_TO_OLD_FS_ID()

#endif

typedef struct {
  	xlator_t    *this;
  	const char  *real_path;
  	dict_t      *xattr;
  	struct iatt *stbuf;
	loc_t       *loc;
} posix_xattr_filler_t;

int
posix_forget (xlator_t *this, inode_t *inode)
{
	uint64_t tmp_cache = 0;
	if (!inode_ctx_del (inode, this, &tmp_cache))
		dict_destroy ((dict_t *)(long)tmp_cache);

	return 0;
}

static void
_posix_xattr_get_set (dict_t *xattr_req,
    		      char *key,
    		      data_t *data,
    		      void *xattrargs)
{
    	posix_xattr_filler_t *filler = xattrargs;
    	char     *value      = NULL;
    	ssize_t   xattr_size = -1;
    	int       ret      = -1;
  	char     *databuf  = NULL;
  	int       _fd      = -1;
	loc_t    *loc      = NULL;
	ssize_t  req_size  = 0;


    	/* should size be put into the data_t ? */
	if (!strcmp (key, GF_CONTENT_KEY)
            && IA_ISREG (filler->stbuf->ia_type)) {

    		/* file content request */
		req_size = data_to_uint64 (data);
		if (req_size >= filler->stbuf->ia_size) {
			_fd = open (filler->real_path, O_RDONLY);

			if (_fd == -1) {
				gf_log (filler->this->name, GF_LOG_ERROR,
					"Opening file %s failed: %s",
					filler->real_path, strerror (errno));
				goto err;
			}

			databuf = GF_CALLOC (1, filler->stbuf->ia_size,
                                             gf_posix_mt_char);

			if (!databuf) {
				gf_log (filler->this->name, GF_LOG_ERROR,
					"Out of memory.");
				goto err;
			}

			ret = read (_fd, databuf, filler->stbuf->ia_size);
			if (ret == -1) {
				gf_log (filler->this->name, GF_LOG_ERROR,
					"Read on file %s failed: %s",
					filler->real_path, strerror (errno));
				goto err;
			}

			ret = close (_fd);
			_fd = -1;
			if (ret == -1) {
				gf_log (filler->this->name, GF_LOG_ERROR,
					"Close on file %s failed: %s",
					filler->real_path, strerror (errno));
				goto err;
			}

			ret = dict_set_bin (filler->xattr, key,
					    databuf, filler->stbuf->ia_size);
			if (ret < 0) {
				goto err;
			}

			/* To avoid double free in cleanup below */
			databuf = NULL;
		err:
			if (_fd != -1)
				close (_fd);
			if (databuf)
				GF_FREE (databuf);
		}
    	} else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) {
		loc = filler->loc;
		if (!list_empty (&loc->inode->fd_list)) {
			ret = dict_set_uint32 (filler->xattr, key, 1);
                        if (ret < 0)
                                gf_log (filler->this->name, GF_LOG_WARNING,
                                        "Failed to set dictionary value for %s",
                                        key);
		} else {
			ret = dict_set_uint32 (filler->xattr, key, 0);
                        if (ret < 0)
                                gf_log (filler->this->name, GF_LOG_WARNING,
                                        "Failed to set dictionary value for %s",
                                        key);
		}
	} else {
		xattr_size = sys_lgetxattr (filler->real_path, key, NULL, 0);

		if (xattr_size > 0) {
			value = GF_CALLOC (1, xattr_size + 1,
                                           gf_posix_mt_char);
                        if (!value)
                                return;

			sys_lgetxattr (filler->real_path, key, value,
                                       xattr_size);

			value[xattr_size] = '\0';
			ret = dict_set_bin (filler->xattr, key,
					    value, xattr_size);
			if (ret < 0)
				gf_log (filler->this->name, GF_LOG_DEBUG,
					"dict set failed. path: %s, key: %s",
					filler->real_path, key);
		}
	}
}


int
posix_fill_gfid_path (xlator_t *this, const char *path, struct iatt *iatt)
{
        int ret = 0;

        if (!iatt)
                return 0;

        ret = sys_lgetxattr (path, "trusted.gfid", iatt->ia_gfid, 16);
        /* Return value of getxattr */
        if (ret == 16)
                ret = 0;

        return ret;
}


int
posix_fill_gfid_fd (xlator_t *this, int fd, struct iatt *iatt)
{
        int ret = 0;

        if (!iatt)
                return 0;

        ret = sys_fgetxattr (fd, "trusted.gfid", iatt->ia_gfid, 16);
        /* Return value of getxattr */
        if (ret == 16)
                ret = 0;

        return ret;
}


int
posix_lstat_with_gfid (xlator_t *this, const char *path, struct iatt *stbuf_p)
{
        struct posix_private  *priv    = NULL;
        int                    ret     = 0;
        struct stat            lstatbuf = {0, };
        struct iatt            stbuf = {0, };

        priv = this->private;

        ret = lstat (path, &lstatbuf);
        if (ret == -1)
                return -1;

        iatt_from_stat (&stbuf, &lstatbuf);

        ret = posix_fill_gfid_path (this, path, &stbuf);
        if (ret)
                gf_log (this->name, GF_LOG_DEBUG, "failed to set gfid");

        if (stbuf_p)
                *stbuf_p = stbuf;

        return ret;
}


int
posix_fstat_with_gfid (xlator_t *this, int fd, struct iatt *stbuf_p)
{
        struct posix_private  *priv    = NULL;
        int                    ret     = 0;
        struct stat            fstatbuf = {0, };
        struct iatt            stbuf = {0, };

        priv = this->private;

        ret = fstat (fd, &fstatbuf);
        if (ret == -1)
                return -1;

        iatt_from_stat (&stbuf, &fstatbuf);

        ret = posix_fill_gfid_fd (this, fd, &stbuf);
        if (ret)
                gf_log (this->name, GF_LOG_DEBUG, "failed to set gfid");

        if (stbuf_p)
                *stbuf_p = stbuf;

        return ret;
}


dict_t *
posix_lookup_xattr_fill (xlator_t *this, const char *real_path, loc_t *loc,
    			 dict_t *xattr_req, struct iatt *buf)
{
    	dict_t     *xattr             = NULL;
    	posix_xattr_filler_t filler   = {0, };

    	xattr = get_new_dict();
    	if (!xattr) {
    		gf_log (this->name, GF_LOG_ERROR,
    			"Out of memory.");
    		goto out;
    	}

    	filler.this      = this;
    	filler.real_path = real_path;
    	filler.xattr     = xattr;
    	filler.stbuf     = buf;
	filler.loc       = loc;

    	dict_foreach (xattr_req, _posix_xattr_get_set, &filler);
out:
    	return xattr;
}


/*
 * If the parent directory of {real_path} has the setgid bit set,
 * then set {gid} to the gid of the parent. Otherwise,
 * leave {gid} unchanged.
 */

int
setgid_override (xlator_t *this, char *real_path, gid_t *gid)
{
        char *                 tmp_path     = NULL;
        char *                 parent_path  = NULL;
        struct iatt            parent_stbuf;

        int op_ret = 0;

        tmp_path = gf_strdup (real_path);
        if (!tmp_path) {
                op_ret = -ENOMEM;
                gf_log ("[storage/posix]", GF_LOG_ERROR,
                        "Out of memory");
                goto out;
        }

        parent_path = dirname (tmp_path);

        op_ret = posix_lstat_with_gfid (this, parent_path, &parent_stbuf);

        if (op_ret == -1) {
                op_ret = -errno;
                gf_log ("[storage/posix]", GF_LOG_ERROR,
                        "lstat on parent directory (%s) failed: %s",
                        parent_path, strerror (errno));
                goto out;
        }

        if (parent_stbuf.ia_prot.sgid) {
                /*
                   Entries created inside a setgid directory
                   should inherit the gid from the parent
                */

                *gid = parent_stbuf.ia_gid;
        }
out:

        if (tmp_path)
                GF_FREE (tmp_path);

        return op_ret;
}


int
posix_gfid_set (xlator_t *this, const char *path, dict_t *xattr_req)
{
        void        *uuid_req = NULL;
        uuid_t       uuid_curr;
        int          ret = 0;
        struct stat  stat = {0, };

        if (!xattr_req)
                return 0;

        if (sys_lstat (path, &stat) != 0)
                return 0;

        ret = sys_lgetxattr (path, "trusted.gfid", uuid_curr, 16);

        if (ret == 16)
                return 0;

        ret = dict_get_ptr (xattr_req, "gfid-req", &uuid_req);
        if (ret)
                goto out;

        ret = sys_lsetxattr (path, "trusted.gfid", uuid_req, 16, XATTR_CREATE);

out:
        return ret;
}


int32_t
posix_lookup (call_frame_t *frame, xlator_t *this,
              loc_t *loc, dict_t *xattr_req)
{
        struct iatt buf                = {0, };
        char *      real_path          = NULL;
        int32_t     op_ret             = -1;
        int32_t     entry_ret          = 0;
        int32_t     op_errno           = 0;
        dict_t *    xattr              = NULL;
        char *      pathdup            = NULL;
        char *      parentpath         = NULL;
        struct iatt postparent         = {0,};

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (loc, out);
	VALIDATE_OR_GOTO (loc->path, out);

        MAKE_REAL_PATH (real_path, this, loc->path);

        posix_gfid_set (this, real_path, xattr_req);

        op_ret   = posix_lstat_with_gfid (this, real_path, &buf);
        op_errno = errno;

        if (op_ret == -1) {
		if (op_errno != ENOENT) {
			gf_log (this->name, GF_LOG_ERROR,
				"lstat on %s failed: %s",
				loc->path, strerror (op_errno));
		}

                entry_ret = -1;
                goto parent;
        }

        if (xattr_req && (op_ret == 0)) {
		xattr = posix_lookup_xattr_fill (this, real_path, loc,
						 xattr_req, &buf);
        }

parent:
        if (loc->parent) {
                pathdup = gf_strdup (real_path);
                GF_VALIDATE_OR_GOTO (this->name, pathdup, out);

                parentpath = dirname (pathdup);

                op_ret = posix_lstat_with_gfid (this, parentpath, &postparent);
                if (op_ret == -1) {
                        op_errno = errno;
                        gf_log (this->name, GF_LOG_ERROR,
                                "post-operation lstat on parent of %s failed: %s",
                                loc->path, strerror (op_errno));
                        goto out;
                }
        }

        op_ret = entry_ret;
out:
        if (pathdup)
                GF_FREE (pathdup);

        if (xattr)
                dict_ref (xattr);

        STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno,
                             (loc)?loc->inode:NULL, &buf, xattr, &postparent);

        if (xattr)
                dict_unref (xattr);

        return 0;
}


int32_t
posix_stat (call_frame_t *frame,
            xlator_t *this,
            loc_t *loc)
{
        struct iatt           buf       = {0,};
        char *                real_path = NULL;
        int32_t               op_ret    = -1;
        int32_t               op_errno  = 0;
        struct posix_private *priv      = NULL; 

        DECLARE_OLD_FS_ID_VAR;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (loc, out);

        priv = this->private;
        VALIDATE_OR_GOTO (priv, out);

        SET_FS_ID (frame->root->uid, frame->root->gid);
        MAKE_REAL_PATH (real_path, this, loc->path);

        op_ret = posix_lstat_with_gfid (this, real_path, &buf);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "lstat on %s failed: %s", loc->path, 
                        strerror (op_errno));
                goto out;
        }

        op_ret = 0;

 out:
        SET_TO_OLD_FS_ID();
        STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, &buf);

        return 0;
}

static int
posix_do_chmod (xlator_t *this,
                const char *path,
                struct iatt *stbuf)
{
        int32_t ret = -1;
        mode_t  mode = 0;

        mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type);
        ret = lchmod (path, mode);
        if ((ret == -1) && (errno == ENOSYS)) {
                ret = chmod (path, mode);
        }

        return ret;
}

static int
posix_do_chown (xlator_t *this,
                const char *path,
                struct iatt *stbuf,
                int32_t valid)
{
        int32_t ret = -1;
        uid_t uid = -1;
        gid_t gid = -1;

        if (valid & GF_SET_ATTR_UID)
                uid = stbuf->ia_uid;

        if (valid & GF_SET_ATTR_GID)
                gid = stbuf->ia_gid;

        ret = lchown (path, uid, gid);

        return ret;
}

static int
posix_do_utimes (xlator_t *this,
                 const char *path,
                 struct iatt *stbuf)
{
        int32_t ret = -1;
        struct timeval tv[2]     = {{0,},{0,}};

        tv[0].tv_sec  = stbuf->ia_atime;
        tv[0].tv_usec = stbuf->ia_atime_nsec / 1000;
        tv[1].tv_sec  = stbuf->ia_mtime;
        tv[1].tv_usec = stbuf->ia_mtime_nsec / 1000;

        ret = lutimes (path, tv);
        if ((ret == -1) && (errno == ENOSYS)) {
                ret = utimes (path, tv);
	}
        
        return ret;
}

int
posix_setattr (call_frame_t *frame, xlator_t *this,
               loc_t *loc, struct iatt *stbuf, int32_t valid)
{
        int32_t        op_ret    = -1;
        int32_t        op_errno  = 0;
        char *         real_path = 0;
        struct iatt    statpre     = {0,};
        struct iatt    statpost    = {0,};

        DECLARE_OLD_FS_ID_VAR;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (loc, out);

        SET_FS_ID (frame->root->uid, frame->root->gid);
        MAKE_REAL_PATH (real_path, this, loc->path);

        op_ret = posix_lstat_with_gfid (this, real_path, &statpre);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "setattr (lstat) on %s failed: %s", real_path,
                        strerror (op_errno));
                goto out;
        }

        if (valid & GF_SET_ATTR_MODE) {
                op_ret = posix_do_chmod (this, real_path, stbuf);
                if (op_ret == -1) {
                        op_errno = errno;
                        gf_log (this->name, GF_LOG_ERROR,
                                "setattr (chmod) on %s failed: %s", real_path,
                                strerror (op_errno));
                        goto out;
                }
        }

        if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)){
                op_ret = posix_do_chown (this, real_path, stbuf, valid);
                if (op_ret == -1) {
                        op_errno = errno;
                        gf_log (this->name, GF_LOG_ERROR,
                                "setattr (chown) on %s failed: %s", real_path,
                                strerror (op_errno));
                        goto out;
                }
        }

        if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
                op_ret = posix_do_utimes (this, real_path, stbuf);
                if (op_ret == -1) {
                        op_errno = errno;
                        gf_log (this->name, GF_LOG_ERROR,
                                "setattr (utimes) on %s failed: %s", real_path,
                                strerror (op_errno));
                        goto out;
                }
        }

        if (!valid) {
                op_ret = lchown (real_path, -1, -1);
                if (op_ret == -1) {
                        op_errno = errno;
                        gf_log (this->name, GF_LOG_ERROR,
                                "lchown (%s, -1, -1) failed => (%s)",
                                real_path, strerror (op_errno));

                        goto out;
                }
        }

        op_ret = posix_lstat_with_gfid (this, real_path, &statpost);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "setattr (lstat) on %s failed: %s", real_path,
                        strerror (op_errno));
                goto out;
        }

        op_ret = 0;

out:
        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno,
                             &statpre, &statpost);

        return 0;
}

int32_t
posix_do_fchown (xlator_t *this,
                 int fd,
                 struct iatt *stbuf,
                 int32_t valid)
{
        int   ret      = -1;
        uid_t uid = -1;
        gid_t gid = -1;

        if (valid & GF_SET_ATTR_UID)
                uid = stbuf->ia_uid;

        if (valid & GF_SET_ATTR_GID)
                gid = stbuf->ia_gid;

        ret = fchown (fd, uid, gid);

        return ret;
}


int32_t
posix_do_fchmod (xlator_t *this,
                 int fd, struct iatt *stbuf)
{
        mode_t  mode = 0;

        mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type);
        return fchmod (fd, mode);
}

static int
posix_do_futimes (xlator_t *this,
                  int fd,
                  struct iatt *stbuf)
{
        errno = ENOSYS;
        return -1;
}

int
posix_fsetattr (call_frame_t *frame, xlator_t *this,
                fd_t *fd, struct iatt *stbuf, int32_t valid)
{
        int32_t        op_ret    = -1;
        int32_t        op_errno  = 0;
        struct iatt    statpre     = {0,};
        struct iatt    statpost    = {0,};
        struct posix_fd *pfd = NULL;
        uint64_t         tmp_pfd = 0;
        int32_t          ret = -1;

        DECLARE_OLD_FS_ID_VAR;

        SET_FS_ID (frame->root->uid, frame->root->gid);

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (fd, out);

        ret = fd_ctx_get (fd, this, &tmp_pfd);
        if (ret < 0) {
                op_errno = -ret;
                gf_log (this->name, GF_LOG_DEBUG,
			"pfd is NULL from fd=%p", fd);
                goto out;
        }
	pfd = (struct posix_fd *)(long)tmp_pfd;

        op_ret = posix_fstat_with_gfid (this, pfd->fd, &statpre);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "fsetattr (fstat) failed on fd=%p: %s", fd,
                        strerror (op_errno));
                goto out;
        }

        if (valid & GF_SET_ATTR_MODE) {
                op_ret = posix_do_fchmod (this, pfd->fd, stbuf);
                if (op_ret == -1) {
                        op_errno = errno;
                        gf_log (this->name, GF_LOG_ERROR,
                                "fsetattr (fchmod) failed on fd=%p: %s",
                                fd, strerror (op_errno));
                        goto out;
                }
        }

        if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
                op_ret = posix_do_fchown (this, pfd->fd, stbuf, valid);
                if (op_ret == -1) {
                        op_errno = errno;
                        gf_log (this->name, GF_LOG_ERROR,
                                "fsetattr (fchown) failed on fd=%p: %s",
                                fd, strerror (op_errno));
                        goto out;
                }

        }

        if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
                op_ret = posix_do_futimes (this, pfd->fd, stbuf);
                if (op_ret == -1) {
                        op_errno = errno;
                        gf_log (this->name, GF_LOG_ERROR,
                                "fsetattr (futimes) on failed fd=%p: %s", fd,
                                strerror (op_errno));
                        goto out;
                }
        }

        if (!valid) {
                op_ret = fchown (pfd->fd, -1, -1);
                if (op_ret == -1) {
                        op_errno = errno;
                        gf_log (this->name, GF_LOG_ERROR,
                                "fchown (%d, -1, -1) failed => (%s)",
                                pfd->fd, strerror (op_errno));

                        goto out;
                }
        }

        op_ret = posix_fstat_with_gfid (this, pfd->fd, &statpost);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "fsetattr (fstat) failed on fd=%p: %s", fd,
                        strerror (op_errno));
                goto out;
        }

        op_ret = 0;

out:
        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno,
                             &statpre, &statpost);

        return 0;
}

int32_t
posix_opendir (call_frame_t *frame, xlator_t *this,
               loc_t *loc, fd_t *fd)
{
        char *            real_path = NULL;
        int32_t           op_ret    = -1;
        int32_t           op_errno  = EINVAL;
        DIR *             dir       = NULL;
        struct posix_fd * pfd       = NULL;

        DECLARE_OLD_FS_ID_VAR;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (loc, out);
        VALIDATE_OR_GOTO (loc->path, out);
        VALIDATE_OR_GOTO (fd, out);

        SET_FS_ID (frame->root->uid, frame->root->gid);
        MAKE_REAL_PATH (real_path, this, loc->path);

        dir = opendir (real_path);

        if (dir == NULL) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "opendir failed on %s: %s",
			loc->path, strerror (op_errno));
                goto out;
        }

        op_ret = dirfd (dir);
	if (op_ret < 0) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "dirfd() failed on %s: %s",
			loc->path, strerror (op_errno));
		goto out;
	}

        pfd = GF_CALLOC (1, sizeof (*fd), gf_posix_mt_posix_fd);
        if (!pfd) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "Out of memory.");
                goto out;
        }

        pfd->dir = dir;
        pfd->fd = dirfd (dir);
        pfd->path = gf_strdup (real_path);
        if (!pfd->path) {
                gf_log (this->name, GF_LOG_ERROR,
                        "Out of memory.");
                goto out;
        }

	fd_ctx_set (fd, this, (uint64_t)(long)pfd);

        op_ret = 0;

 out:
        if (op_ret == -1) {
                if (dir) {
                        closedir (dir);
                        dir = NULL;
                }
                if (pfd) {
                        if (pfd->path)
                                GF_FREE (pfd->path);
                        GF_FREE (pfd);
                        pfd = NULL;
                }
        }

        SET_TO_OLD_FS_ID ();
        STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd);
        return 0;
}

int32_t
posix_releasedir (xlator_t *this,
		  fd_t *fd)
{
        struct posix_fd * pfd      = NULL;
	uint64_t          tmp_pfd  = 0;
        int               ret      = 0;

        struct posix_private *priv = NULL;

        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (fd, out);

        ret = fd_ctx_del (fd, this, &tmp_pfd);
        if (ret < 0) {
                gf_log (this->name, GF_LOG_DEBUG,
                        "pfd from fd=%p is NULL", fd);
                goto out;
        }

	pfd = (struct posix_fd *)(long)tmp_pfd;
        if (!pfd->dir) {
                gf_log (this->name, GF_LOG_DEBUG,
                        "pfd->dir is NULL for fd=%p path=%s",
                        fd, pfd->path ? pfd->path : "<NULL>");
                goto out;
        }

        priv = this->private;

        if (!pfd->path) {
                gf_log (this->name, GF_LOG_DEBUG,
                        "pfd->path was NULL. fd=%p pfd=%p",
                        fd, pfd);
        }

        pthread_mutex_lock (&priv->janitor_lock);
        {
                INIT_LIST_HEAD (&pfd->list);
                list_add_tail (&pfd->list, &priv->janitor_fds);
                pthread_cond_signal (&priv->janitor_cond);
        }
        pthread_mutex_unlock (&priv->janitor_lock);

 out:
        return 0;
}


int32_t
posix_readlink (call_frame_t *frame, xlator_t *this,
                loc_t *loc, size_t size)
{
        char *  dest      = NULL;
        int32_t op_ret    = -1;
        int32_t lstat_ret = -1;
        int32_t op_errno  = 0;
        char *  real_path = NULL;
        struct iatt stbuf = {0,};

        DECLARE_OLD_FS_ID_VAR;

	VALIDATE_OR_GOTO (frame, out);

        SET_FS_ID (frame->root->uid, frame->root->gid);

        dest = alloca (size + 1);

        MAKE_REAL_PATH (real_path, this, loc->path);

        op_ret = readlink (real_path, dest, size);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "readlink on %s failed: %s", loc->path, 
                        strerror (op_errno));
                goto out;
        }

        dest[op_ret] = 0;

        lstat_ret = posix_lstat_with_gfid (this, real_path, &stbuf);
        if (lstat_ret == -1) {
                op_ret = -1;
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "lstat on %s failed: %s", loc->path,
                        strerror (op_errno));
                goto out;
        }

 out:
        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, dest, &stbuf);

        return 0;
}


int
posix_mknod (call_frame_t *frame, xlator_t *this,
             loc_t *loc, mode_t mode, dev_t dev, dict_t *params)
{
	int                   tmp_fd      = 0;
        int32_t               op_ret      = -1;
        int32_t               op_errno    = 0;
        char                 *real_path   = 0;
        struct iatt           stbuf       = { 0, };
        char                  was_present = 1;
        struct posix_private *priv        = NULL;
        gid_t                 gid         = 0;
        char                 *pathdup   = NULL;
        struct iatt           preparent = {0,};
        struct iatt           postparent = {0,};
        char                 *parentpath = NULL;

        DECLARE_OLD_FS_ID_VAR;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (loc, out);

        priv = this->private;
        VALIDATE_OR_GOTO (priv, out);

        MAKE_REAL_PATH (real_path, this, loc->path);

        gid = frame->root->gid;

        op_ret = setgid_override (this, real_path, &gid);
        if (op_ret < 0)
                goto out;

        SET_FS_ID (frame->root->uid, gid);
        pathdup = gf_strdup (real_path);
        GF_VALIDATE_OR_GOTO (this->name, pathdup, out);

        parentpath = dirname (pathdup);

        op_ret = posix_lstat_with_gfid (this, parentpath, &preparent);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "pre-operation lstat on parent of %s failed: %s",
                        loc->path, strerror (op_errno));
                goto out;
        }

        op_ret = mknod (real_path, mode, dev);

        if (op_ret == -1) {
                op_errno = errno;
		if ((op_errno == EINVAL) && S_ISREG (mode)) {
			/* Over Darwin, mknod with (S_IFREG|mode)
			   doesn't work */
			tmp_fd = creat (real_path, mode);
			if (tmp_fd == -1)
				goto out;
			close (tmp_fd);
		} else {

			gf_log (this->name, GF_LOG_ERROR,
				"mknod on %s failed: %s", loc->path,
				strerror (op_errno));
			goto out;
		}
        }

        op_ret = posix_gfid_set (this, real_path, params);

#ifndef HAVE_SET_FSID
        op_ret = lchown (real_path, frame->root->uid, gid);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "lchown on %s failed: %s", loc->path, 
                        strerror (op_errno));
                goto out;
        }
#endif

        op_ret = posix_lstat_with_gfid (this, real_path, &stbuf);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "mknod on %s failed: %s", loc->path, 
                        strerror (op_errno));
                goto out;
        }

        op_ret = posix_lstat_with_gfid (this, parentpath, &postparent);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "post-operation lstat on parent of %s failed: %s",
                        loc->path, strerror (op_errno));
                goto out;
        }

        op_ret = 0;

 out:
        if (pathdup)
                GF_FREE (pathdup);

        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno,
                             (loc)?loc->inode:NULL, &stbuf, &preparent, &postparent);

        if ((op_ret == -1) && (!was_present)) {
                unlink (real_path);
        }

        return 0;
}


static int
janitor_walker (const char *fpath, const struct stat *sb,
                int typeflag, struct FTW *ftwbuf)
{
        switch (sb->st_mode & S_IFMT) {
        case S_IFREG:
        case S_IFBLK:
        case S_IFLNK:
        case S_IFCHR:
        case S_IFIFO:
        case S_IFSOCK:
                gf_log (THIS->name, GF_LOG_TRACE,
                        "unlinking %s", fpath);
                unlink (fpath);
                break;

        case S_IFDIR:
                if (ftwbuf->level) { /* don't remove top level dir */
                        gf_log (THIS->name, GF_LOG_TRACE,
                                "removing directory %s", fpath);

                        rmdir (fpath);
                }
                break;
        }

        return 0;   /* 0 = FTW_CONTINUE */
}


static struct posix_fd *
janitor_get_next_fd (xlator_t *this)
{
        struct posix_private *priv = NULL;
        struct posix_fd *pfd = NULL;

        struct timespec timeout;

        priv = this->private;

        pthread_mutex_lock (&priv->janitor_lock);
        {
                if (list_empty (&priv->janitor_fds)) {
                        time (&timeout.tv_sec);
                        timeout.tv_sec += priv->janitor_sleep_duration;
                        timeout.tv_nsec = 0;

                        pthread_cond_timedwait (&priv->janitor_cond,
                                                &priv->janitor_lock,
                                                &timeout);
                        goto unlock;
                }

                pfd = list_entry (priv->janitor_fds.next, struct posix_fd,
                                  list);

                list_del (priv->janitor_fds.next);
        }
unlock:
        pthread_mutex_unlock (&priv->janitor_lock);

        return pfd;
}


static void *
posix_janitor_thread_proc (void *data)
{
        xlator_t *            this = NULL;
        struct posix_private *priv = NULL;
        struct posix_fd *pfd;

        time_t now;

        this = data;
        priv = this->private;

        THIS = this;

        while (1) {
                time (&now);
                if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {
                        gf_log (this->name, GF_LOG_TRACE,
                                "janitor cleaning out /" GF_REPLICATE_TRASH_DIR);

                        nftw (priv->trash_path,
                              janitor_walker,
                              32,
                              FTW_DEPTH | FTW_PHYS);

                        priv->last_landfill_check = now;
                }

                pfd = janitor_get_next_fd (this);
                if (pfd) {
                        if (pfd->dir == NULL) {
                                gf_log (this->name, GF_LOG_TRACE,
                                        "janitor: closing file fd=%d", pfd->fd);
                                close (pfd->fd);
                        } else {
                                gf_log (this->name, GF_LOG_TRACE,
                                        "janitor: closing dir fd=%p", pfd->dir);
                                closedir (pfd->dir);
                        }

                        if (pfd->path)
                                GF_FREE (pfd->path);

                        GF_FREE (pfd);
                }
        }

        return NULL;
}


static void
posix_spawn_janitor_thread (xlator_t *this)
{
        struct posix_private *priv = NULL;
        int ret = 0;

        priv = this->private;

        LOCK (&priv->lock);
        {
                if (!priv->janitor_present) {
                        ret = pthread_create (&priv->janitor, NULL,
                                              posix_janitor_thread_proc, this);

                        if (ret < 0) {
                                gf_log (this->name, GF_LOG_ERROR,
                                        "spawning janitor thread failed: %s",
                                        strerror (errno));
                                goto unlock;
                        }

                        priv->janitor_present = _gf_true;
                }
        }
unlock:
        UNLOCK (&priv->lock);
}


int
posix_mkdir (call_frame_t *frame, xlator_t *this,
             loc_t *loc, mode_t mode, dict_t *params)
{
        int32_t               op_ret      = -1;
        int32_t               op_errno    = 0;
        char                 *real_path   = NULL;
        struct iatt           stbuf       = {0, };
        char                  was_present = 1;
        struct posix_private *priv        = NULL;
        gid_t                 gid         = 0;
        char                 *pathdup   = NULL;
        char                 *parentpath = NULL;
        struct iatt           preparent = {0,};
        struct iatt           postparent = {0,};

        DECLARE_OLD_FS_ID_VAR;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (loc, out);

        priv = this->private;
        VALIDATE_OR_GOTO (priv, out);

        MAKE_REAL_PATH (real_path, this, loc->path);

        gid = frame->root->gid;

        op_ret = posix_lstat_with_gfid (this, real_path, &stbuf);
        if ((op_ret == -1) && (errno == ENOENT)) {
                was_present = 0;
        }

        op_ret = setgid_override (this, real_path, &gid);
        if (op_ret < 0)
                goto out;

        SET_FS_ID (frame->root->uid, gid);
        pathdup = gf_strdup (real_path);
        GF_VALIDATE_OR_GOTO (this->name, pathdup, out);

        parentpath = dirname (pathdup);

        op_ret = posix_lstat_with_gfid (this, parentpath, &preparent);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "pre-operation lstat on parent of %s failed: %s",
                        loc->path, strerror (op_errno));
                goto out;
        }

        op_ret = mkdir (real_path, mode);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "mkdir of %s failed: %s", loc->path, 
                        strerror (op_errno));
                goto out;
        }

        op_ret = posix_gfid_set (this, real_path, params);

#ifndef HAVE_SET_FSID
        op_ret = chown (real_path, frame->root->uid, gid);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "chown on %s failed: %s", loc->path, 
                        strerror (op_errno));
                goto out;
        }
#endif

        op_ret = posix_lstat_with_gfid (this, real_path, &stbuf);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "lstat on %s failed: %s", loc->path, 
                        strerror (op_errno));
                goto out;
        }

        op_ret = posix_lstat_with_gfid (this, parentpath, &postparent);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "post-operation lstat on parent of %s failed: %s",
                        loc->path, strerror (op_errno));
                goto out;
        }

        op_ret = 0;

 out:
        if (pathdup)
                GF_FREE (pathdup);

        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno,
                             (loc)?loc->inode:NULL, &stbuf, &preparent, &postparent);

        if ((op_ret == -1) && (!was_present)) {
                unlink (real_path);
        }

        return 0;
}


int32_t
posix_unlink (call_frame_t *frame, xlator_t *this,
              loc_t *loc)
{
        int32_t                  op_ret    = -1;
        int32_t                  op_errno  = 0;
        char                    *real_path = NULL;
        char                    *pathdup   = NULL;
        char                    *parentpath = NULL;
        int32_t                  fd = -1;
        struct posix_private    *priv      = NULL;
        struct iatt            preparent = {0,};
        struct iatt            postparent = {0,};

        DECLARE_OLD_FS_ID_VAR;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (loc, out);

        SET_FS_ID (frame->root->uid, frame->root->gid);
        MAKE_REAL_PATH (real_path, this, loc->path);

        pathdup = gf_strdup (real_path);
        GF_VALIDATE_OR_GOTO (this->name, pathdup, out);

        parentpath = dirname (pathdup);

        op_ret = posix_lstat_with_gfid (this, parentpath, &preparent);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "pre-operation lstat on parent of %s failed: %s",
                        loc->path, strerror (op_errno));
                goto out;
        }

        priv = this->private;
        if (priv->background_unlink) {
                if (IA_ISREG (loc->inode->ia_type)) {
                        fd = open (real_path, O_RDONLY);
                        if (fd == -1) {
                                op_ret = -1;
                                op_errno = errno;
                                gf_log (this->name, GF_LOG_ERROR,
                                        "open of %s failed: %s", loc->path,
                                        strerror (op_errno));
                                goto out;
                        }
                }
        }

        op_ret = unlink (real_path);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "unlink of %s failed: %s", loc->path, 
                        strerror (op_errno));
                goto out;
        }

        op_ret = posix_lstat_with_gfid (this, parentpath, &postparent);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "post-operation lstat on parent of %s failed: %s",
                        loc->path, strerror (op_errno));
                goto out;
        }

        op_ret = 0;

 out:
        if (pathdup)
                GF_FREE (pathdup);

        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
                             &preparent, &postparent);

        if (fd != -1) {
                close (fd);
        }

        return 0;
}

int32_t
posix_rmdir (call_frame_t *frame, xlator_t *this,
             loc_t *loc)
{
        int32_t op_ret    = -1;
        int32_t op_errno  = 0;
        char *  real_path = NULL;
        char *  pathdup   = NULL;
        char *  parentpath = NULL;
        struct iatt   preparent = {0,};
        struct iatt   postparent = {0,};

        DECLARE_OLD_FS_ID_VAR;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (loc, out);

        SET_FS_ID (frame->root->uid, frame->root->gid);
        MAKE_REAL_PATH (real_path, this, loc->path);

        pathdup = gf_strdup (real_path);
        GF_VALIDATE_OR_GOTO (this->name, pathdup, out);

        parentpath = dirname (pathdup);

        op_ret = posix_lstat_with_gfid (this, parentpath, &preparent);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "pre-operation lstat on parent of %s failed: %s",
                        loc->path, strerror (op_errno));
                goto out;
        }

        op_ret = rmdir (real_path);
        op_errno = errno;

	if (op_errno == EEXIST)
		/* Solaris sets errno = EEXIST instead of ENOTEMPTY */
		op_errno = ENOTEMPTY;

        /* No need to log a common error as ENOTEMPTY */
        if (op_ret == -1 && op_errno != ENOTEMPTY) {
                gf_log (this->name, GF_LOG_ERROR,
                        "rmdir of %s failed: %s", loc->path, 
                        strerror (op_errno));
        }

        if (op_ret == -1)
                goto out;

        op_ret = posix_lstat_with_gfid (this, parentpath, &postparent);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "post-operation lstat on parent of %s failed: %s",
                        loc->path, strerror (op_errno));
                goto out;
        }

 out:
        if (pathdup)
                GF_FREE (pathdup);

        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
                             &preparent, &postparent);

        return 0;
}


int
posix_symlink (call_frame_t *frame, xlator_t *this,
               const char *linkname, loc_t *loc, dict_t *params)
{
        int32_t               op_ret      = -1;
        int32_t               op_errno    = 0;
        char *                real_path   = 0;
        struct iatt           stbuf       = { 0, };
        struct posix_private *priv        = NULL;
        gid_t                 gid         = 0;
        char                  was_present = 1; 
        char                 *pathdup   = NULL;
        char                 *parentpath = NULL;
        struct iatt           preparent = {0,};
        struct iatt           postparent = {0,};

        DECLARE_OLD_FS_ID_VAR;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (linkname, out);
        VALIDATE_OR_GOTO (loc, out);

        priv = this->private;
        VALIDATE_OR_GOTO (priv, out);

        MAKE_REAL_PATH (real_path, this, loc->path);

        op_ret = posix_lstat_with_gfid (this, real_path, &stbuf);
        if ((op_ret == -1) && (errno == ENOENT)){
                was_present = 0;
        }

        gid = frame->root->gid;

        op_ret = setgid_override (this, real_path, &gid);
        if (op_ret < 0)
                goto out;

        SET_FS_ID (frame->root->uid, gid);
        pathdup = gf_strdup (real_path);
        GF_VALIDATE_OR_GOTO (this->name, pathdup, out);

        parentpath = dirname (pathdup);

        op_ret = posix_lstat_with_gfid (this, parentpath, &preparent);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "pre-operation lstat on parent of %s failed: %s",
                        loc->path, strerror (op_errno));
                goto out;
        }

        op_ret = symlink (linkname, real_path);

        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "symlink of %s --> %s failed: %s",
                        loc->path, linkname, strerror (op_errno));
                goto out;
        }

        op_ret = posix_gfid_set (this, real_path, params);

#ifndef HAVE_SET_FSID
        op_ret = lchown (real_path, frame->root->uid, gid);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "lchown failed on %s: %s",
                        loc->path, strerror (op_errno));
                goto out;
        }
#endif
        op_ret = posix_lstat_with_gfid (this, real_path, &stbuf);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "lstat failed on %s: %s",
                        loc->path, strerror (op_errno));
                goto out;
        }

        op_ret = posix_lstat_with_gfid (this, parentpath, &postparent);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "post-operation lstat on parent of %s failed: %s",
                        loc->path, strerror (op_errno));
                goto out;
        }
 
        op_ret = 0;

 out:
        if (pathdup)
                GF_FREE (pathdup);

        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno,
                             (loc)?loc->inode:NULL, &stbuf, &preparent, &postparent);

        if ((op_ret == -1) && (!was_present)) {
                unlink (real_path);
        }

        return 0;
}


int
posix_rename (call_frame_t *frame, xlator_t *this,
              loc_t *oldloc, loc_t *newloc)
{
        int32_t               op_ret       = -1;
        int32_t               op_errno     = 0;
        char                 *real_oldpath = NULL;
        char                 *real_newpath = NULL;
        struct iatt           stbuf        = {0, };
        struct posix_private *priv         = NULL;
        char                  was_present  = 1; 
        char                 *oldpathdup    = NULL;
        char                 *oldparentpath = NULL;
        char                 *newpathdup    = NULL;
        char                 *newparentpath = NULL;
        struct iatt           preoldparent  = {0, };
        struct iatt           postoldparent = {0, };
        struct iatt           prenewparent  = {0, };
        struct iatt           postnewparent = {0, };

        DECLARE_OLD_FS_ID_VAR;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (oldloc, out);
        VALIDATE_OR_GOTO (newloc, out);

        priv = this->private;
        VALIDATE_OR_GOTO (priv, out);

        SET_FS_ID (frame->root->uid, frame->root->gid);
        MAKE_REAL_PATH (real_oldpath, this, oldloc->path);
        MAKE_REAL_PATH (real_newpath, this, newloc->path);

        oldpathdup = gf_strdup (real_oldpath);
        GF_VALIDATE_OR_GOTO (this->name, oldpathdup, out);

        oldparentpath = dirname (oldpathdup);

        op_ret = posix_lstat_with_gfid (this, oldparentpath, &preoldparent);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "pre-operation lstat on parent of %s failed: %s",
                        oldloc->path, strerror (op_errno));
                goto out;
        }

        newpathdup = gf_strdup (real_newpath);
        GF_VALIDATE_OR_GOTO (this->name, newpathdup, out);

        newparentpath = dirname (newpathdup);

        op_ret = posix_lstat_with_gfid (this, newparentpath, &prenewparent);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "pre-operation lstat on parent of %s failed: %s",
                      newloc->path, strerror (op_errno));
                goto out;
        }

        op_ret = posix_lstat_with_gfid (this, real_newpath, &stbuf);
        if ((op_ret == -1) && (errno == ENOENT)){
                was_present = 0;
        }

        op_ret = rename (real_oldpath, real_newpath);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name,
			(op_errno == ENOTEMPTY ? GF_LOG_DEBUG : GF_LOG_ERROR),
                        "rename of %s to %s failed: %s",
                        oldloc->path, newloc->path, strerror (op_errno));
                goto out;
        }

        op_ret = posix_lstat_with_gfid (this, real_newpath, &stbuf);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "lstat on %s failed: %s",
                        real_newpath, strerror (op_errno));
                goto out;
        }

        op_ret = posix_lstat_with_gfid (this, oldparentpath, &postoldparent);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "post-operation lstat on parent of %s failed: %s",
                        oldloc->path, strerror (op_errno));
                goto out;
        }

        op_ret = posix_lstat_with_gfid (this, newparentpath, &postnewparent);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "post-operation lstat on parent of %s failed: %s",
                        newloc->path, strerror (op_errno));
                goto out;
        }

        op_ret = 0;

 out:
        if (oldpathdup)
                GF_FREE (oldpathdup);

        if (newpathdup)
                GF_FREE (newpathdup);


        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, &stbuf,
                             &preoldparent, &postoldparent,
                             &prenewparent, &postnewparent);

        if ((op_ret == -1) && !was_present) {
                unlink (real_newpath);
        } 

        return 0;
}


int
posix_link (call_frame_t *frame, xlator_t *this,
            loc_t *oldloc, loc_t *newloc)
{
        int32_t               op_ret       = -1;
        int32_t               op_errno     = 0;
        char                 *real_oldpath = 0;
        char                 *real_newpath = 0;
        struct iatt           stbuf        = {0, };
        struct posix_private *priv         = NULL;
        char                  was_present  = 1;
        char                 *newpathdup   = NULL;
        char                 *newparentpath = NULL;
        struct iatt           preparent = {0,};
        struct iatt           postparent = {0,};

        DECLARE_OLD_FS_ID_VAR;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (oldloc, out);
        VALIDATE_OR_GOTO (newloc, out);

        priv = this->private;
        VALIDATE_OR_GOTO (priv, out);

        SET_FS_ID (frame->root->uid, frame->root->gid);
        MAKE_REAL_PATH (real_oldpath, this, oldloc->path);
        MAKE_REAL_PATH (real_newpath, this, newloc->path);

        op_ret = posix_lstat_with_gfid (this, real_newpath, &stbuf);
        if ((op_ret == -1) && (errno == ENOENT)) {
                was_present = 0;
        }

        newpathdup  = gf_strdup (real_newpath);
        if (!newpathdup) {
                gf_log (this->name, GF_LOG_ERROR, "strdup failed");
                op_errno = ENOMEM;
                goto out;
        }

        newparentpath = dirname (newpathdup);
        op_ret = posix_lstat_with_gfid (this, newparentpath, &preparent);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR, "lstat failed: %s: %s",
                        newparentpath, strerror (op_errno));
                goto out;
        }

        op_ret = link (real_oldpath, real_newpath);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "link %s to %s failed: %s",
                        oldloc->path, newloc->path, strerror (op_errno));
                goto out;
        }

        op_ret = posix_lstat_with_gfid (this, real_newpath, &stbuf);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "lstat on %s failed: %s",
                        real_newpath, strerror (op_errno));
                goto out;
        }

        op_ret = posix_lstat_with_gfid (this, newparentpath, &postparent);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR, "lstat failed: %s: %s",
                        newparentpath, strerror (op_errno));
                goto out;
        }

        op_ret = 0;

 out:
        if (newpathdup)
                GF_FREE (newpathdup);
        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (link, frame, op_ret, op_errno,
                             (oldloc)?oldloc->inode:NULL, &stbuf, &preparent,
                             &postparent);

        if ((op_ret == -1) && (!was_present)) {
                unlink (real_newpath);
        }

        return 0;
}


int32_t
posix_truncate (call_frame_t *frame,
                xlator_t *this,
                loc_t *loc,
                off_t offset)
{
        int32_t               op_ret    = -1;
        int32_t               op_errno  = 0;
        char                 *real_path = 0;
        struct posix_private *priv      = NULL;
        struct iatt           prebuf    = {0,};
        struct iatt           postbuf   = {0,};

        DECLARE_OLD_FS_ID_VAR;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (loc, out);

        priv = this->private;
        VALIDATE_OR_GOTO (priv, out);

        SET_FS_ID (frame->root->uid, frame->root->gid);
        MAKE_REAL_PATH (real_path, this, loc->path);

        op_ret = posix_lstat_with_gfid (this, real_path, &prebuf);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "pre-operation lstat on %s failed: %s",
                        loc->path, strerror (op_errno));
                goto out;
        }

        op_ret = truncate (real_path, offset);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "truncate on %s failed: %s",
                        loc->path, strerror (op_errno));
                goto out;
        }

        op_ret = posix_lstat_with_gfid (this, real_path, &postbuf);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR, "lstat on %s failed: %s",
                        real_path, strerror (op_errno));
                goto out;
        }

        op_ret = 0;

 out:
        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
                             &prebuf, &postbuf);

        return 0;
}


int32_t
posix_create (call_frame_t *frame, xlator_t *this,
              loc_t *loc, int32_t flags, mode_t mode,
              fd_t *fd, dict_t *params)
{
        int32_t                op_ret      = -1;
        int32_t                op_errno    = 0;
        int32_t                _fd         = -1;
        int                    _flags      = 0;
        char *                 real_path   = NULL;
        struct iatt            stbuf       = {0, };
        struct posix_fd *      pfd         = NULL;
        struct posix_private * priv        = NULL;
        char                   was_present = 1;  

        gid_t                  gid         = 0;
        char                  *pathdup   = NULL;
        char                  *parentpath = NULL;
        struct iatt            preparent = {0,};
        struct iatt            postparent = {0,};

        DECLARE_OLD_FS_ID_VAR;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (this->private, out);
        VALIDATE_OR_GOTO (loc, out);
        VALIDATE_OR_GOTO (fd, out);

        priv = this->private;
        VALIDATE_OR_GOTO (priv, out);

        MAKE_REAL_PATH (real_path, this, loc->path);

        gid = frame->root->gid;

        op_ret = setgid_override (this, real_path, &gid);

        if (op_ret < 0) {
                goto out;
        }

        SET_FS_ID (frame->root->uid, gid);
        pathdup = gf_strdup (real_path);
        GF_VALIDATE_OR_GOTO (this->name, pathdup, out);

        parentpath = dirname (pathdup);

        op_ret = posix_lstat_with_gfid (this, parentpath, &preparent);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "pre-operation lstat on parent of %s failed: %s",
                        loc->path, strerror (op_errno));
                goto out;
        }

        if (!flags) {
                _flags = O_CREAT | O_RDWR | O_EXCL;
        }
        else {
                _flags = flags | O_CREAT;
        }

        op_ret = posix_lstat_with_gfid (this, real_path, &stbuf);
        if ((op_ret == -1) && (errno == ENOENT)) {
                was_present = 0;
        }

        if (priv->o_direct)
                _flags |= O_DIRECT;

        _fd = open (real_path, _flags, mode);

        if (_fd == -1) {
                op_errno = errno;
                op_ret = -1;
                gf_log (this->name, GF_LOG_ERROR,
                        "open on %s failed: %s", loc->path,
                        strerror (op_errno));
                goto out;
        }

        op_ret = posix_gfid_set (this, real_path, params);

#ifndef HAVE_SET_FSID
        op_ret = chown (real_path, frame->root->uid, gid);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "chown on %s failed: %s",
			real_path, strerror (op_errno));
        }
#endif

        op_ret = posix_fstat_with_gfid (this, _fd, &stbuf);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "fstat on %d failed: %s", _fd, strerror (op_errno));
                goto out;
        }

        op_ret = posix_lstat_with_gfid (this, parentpath, &postparent);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "post-operation lstat on parent of %s failed: %s",
                        loc->path, strerror (op_errno));
                goto out;
        }

	op_ret = -1;
        pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd);

        if (!pfd) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "Out of memory.");
                goto out;
        }

        pfd->flags = flags;
        pfd->fd    = _fd;

	fd_ctx_set (fd, this, (uint64_t)(long)pfd);

        LOCK (&priv->lock);
        {
                priv->nr_files++;
        }
        UNLOCK (&priv->lock);

        op_ret = 0;

 out:
        if (pathdup)
                GF_FREE (pathdup);
        SET_TO_OLD_FS_ID ();

        if ((-1 == op_ret) && (_fd != -1)) {
                close (_fd);

                if (!was_present) {
                        unlink (real_path);
                }
        }

        STACK_UNWIND_STRICT (create, frame, op_ret, op_errno,
                             fd, (loc)?loc->inode:NULL, &stbuf, &preparent,
                             &postparent);

        return 0;
}

int32_t
posix_open (call_frame_t *frame, xlator_t *this,
            loc_t *loc, int32_t flags, fd_t *fd, int wbflags)
{
        int32_t               op_ret       = -1;
        int32_t               op_errno     = 0;
        char                 *real_path    = NULL;
        int32_t               _fd          = -1;
        struct posix_fd      *pfd          = NULL;
        struct posix_private *priv         = NULL;
        char                  was_present  = 1;
        gid_t                 gid          = 0;
        struct iatt           stbuf        = {0, };

        DECLARE_OLD_FS_ID_VAR;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (this->private, out);
        VALIDATE_OR_GOTO (loc, out);
        VALIDATE_OR_GOTO (fd, out);

        priv = this->private;
        VALIDATE_OR_GOTO (priv, out);

        MAKE_REAL_PATH (real_path, this, loc->path);

        op_ret = setgid_override (this, real_path, &gid);
        if (op_ret < 0)
                goto out;

        SET_FS_ID (frame->root->uid, gid);

        if (priv->o_direct)
                flags |= O_DIRECT;

        op_ret = posix_lstat_with_gfid (this, real_path, &stbuf);
        if ((op_ret == -1) && (errno == ENOENT)) {
                was_present = 0;
        }

        _fd = open (real_path, flags, 0);
        if (_fd == -1) {
                op_ret   = -1;
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "open on %s: %s", real_path, strerror (op_errno));
                goto out;
        }

        pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd);

        if (!pfd) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "Out of memory.");
                goto out;
        }

        pfd->flags = flags;
        pfd->fd    = _fd;
        if (wbflags == GF_OPEN_FSYNC)
                pfd->flushwrites = 1;

	fd_ctx_set (fd, this, (uint64_t)(long)pfd);

#ifndef HAVE_SET_FSID
        if (flags & O_CREAT) {
                op_ret = chown (real_path, frame->root->uid, gid);
                if (op_ret == -1) {
                        op_errno = errno;
                        gf_log (this->name, GF_LOG_ERROR,
                                "chown on %s failed: %s",
				real_path, strerror (op_errno));
                        goto out;
                }
        }
#endif

        if (flags & O_CREAT) {
                op_ret = posix_lstat_with_gfid (this, real_path, &stbuf);
                if (op_ret == -1) {
                        op_errno = errno;
                        gf_log (this->name, GF_LOG_ERROR, "lstat on (%s) "
                                "failed: %s", real_path, strerror (op_errno));
                        goto out;
                }
        }

        LOCK (&priv->lock);
        {
                priv->nr_files++;
        }
        UNLOCK (&priv->lock);

        op_ret = 0;

 out:
        if (op_ret == -1) {
                if (_fd != -1) {
                        close (_fd);
                }
        }

        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);

        return 0;
}

#define ALIGN_BUF(ptr,bound) ((void *)((unsigned long)(ptr + bound - 1) & \
                                       (unsigned long)(~(bound - 1))))

int
posix_readv (call_frame_t *frame, xlator_t *this,
             fd_t *fd, size_t size, off_t offset)
{
	uint64_t               tmp_pfd    = 0;
        int32_t                op_ret     = -1;
        int32_t                op_errno   = 0;
        int                    _fd        = -1;
        struct posix_private * priv       = NULL;
        struct iobuf         * iobuf      = NULL;
        struct iobref        * iobref     = NULL;
        struct iovec           vec        = {0,};
        struct posix_fd *      pfd        = NULL;
        struct iatt            stbuf      = {0,};
        int                    align      = 1;
        int                    ret        = -1;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (fd, out);
        VALIDATE_OR_GOTO (this->private, out);

        priv = this->private;
        VALIDATE_OR_GOTO (priv, out);

        ret = fd_ctx_get (fd, this, &tmp_pfd);
        if (ret < 0) {
                op_errno = -ret;
                gf_log (this->name, GF_LOG_DEBUG,
			"pfd is NULL from fd=%p", fd);
                goto out;
        }
	pfd = (struct posix_fd *)(long)tmp_pfd;

        if (!size) {
                op_errno = EINVAL;
                gf_log (this->name, GF_LOG_DEBUG, "size=%"GF_PRI_SIZET, size);
                goto out;
        }

        if (pfd->flags & O_DIRECT) {
                align = 4096;    /* align to page boundary */
        }

        iobuf = iobuf_get (this->ctx->iobuf_pool);
        if (!iobuf) {
                gf_log (this->name, GF_LOG_ERROR,
                        "Out of memory.");
                goto out;
        }

        _fd = pfd->fd;
        op_ret = pread (_fd, iobuf->ptr, size, offset);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "read failed on fd=%p: %s", fd,
                        strerror (op_errno));
                goto out;
        }

        LOCK (&priv->lock);
        {
                priv->read_value    += op_ret;
        }
        UNLOCK (&priv->lock);

        vec.iov_base = iobuf->ptr;
        vec.iov_len  = op_ret;

        iobref = iobref_new ();

        iobref_add (iobref, iobuf);

        /*
         *  readv successful, and we need to get the stat of the file
         *  we read from
         */

        op_ret = posix_fstat_with_gfid (this, _fd, &stbuf);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "fstat failed on fd=%p: %s", fd,
                        strerror (op_errno));
                goto out;
        }

        /* Hack to notify higher layers of EOF. */
        if (stbuf.ia_size == 0)
                op_errno = ENOENT;
        else if ((offset + vec.iov_len) == stbuf.ia_size)
                op_errno = ENOENT;

        op_ret = vec.iov_len;
out:

        STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
                             &vec, 1, &stbuf, iobref);

        if (iobref)
                iobref_unref (iobref);
        if (iobuf)
                iobuf_unref (iobuf);

        return 0;
}


int32_t
__posix_pwritev (int fd, struct iovec *vector, int count, off_t offset)
{
        int32_t         op_ret = 0;
        int             idx = 0;
        int             retval = 0;
        off_t           internal_off = 0;

        if (!vector)
                return -EFAULT;

        internal_off = offset;
        for (idx = 0; idx < count; idx++) {
                retval = pwrite (fd, vector[idx].iov_base, vector[idx].iov_len,
                                 internal_off);
                if (retval == -1) {
                        op_ret = -errno;
                        goto err;
                }
                op_ret += retval;
                internal_off += retval;
        }

err:
        return op_ret;
}


int32_t
__posix_writev (int fd, struct iovec *vector, int count, off_t startoff,
                int odirect)
{
        int32_t         op_ret = 0;
        int             idx = 0;
        int             align = 4096;
        int             max_buf_size = 0;
        int             retval = 0;
        char            *buf = NULL;
        char            *alloc_buf = NULL;
        off_t           internal_off = 0;

        /* Check for the O_DIRECT flag during open() */
        if (!odirect)
                return __posix_pwritev (fd, vector, count, startoff);

        for (idx = 0; idx < count; idx++) {
                if (max_buf_size < vector[idx].iov_len)
                        max_buf_size = vector[idx].iov_len;
        }

        alloc_buf = GF_MALLOC (1 * (max_buf_size + align), gf_posix_mt_char);
        if (!alloc_buf) {
                op_ret = -errno;
                goto err;
        }

        internal_off = startoff;
        for (idx = 0; idx < count; idx++) {
                /* page aligned buffer */
                buf = ALIGN_BUF (alloc_buf, align);

                memcpy (buf, vector[idx].iov_base, vector[idx].iov_len);

                /* not sure whether writev works on O_DIRECT'd fd */
                retval = pwrite (fd, buf, vector[idx].iov_len, internal_off);
                if (retval == -1) {
                        op_ret = -errno;
                        goto err;
                }

                op_ret += retval;
                internal_off += retval;
        }

err:
        if (alloc_buf)
                GF_FREE (alloc_buf);

        return op_ret;
}


int32_t
posix_writev (call_frame_t *frame, xlator_t *this,
              fd_t *fd, struct iovec *vector, int32_t count, off_t offset,
              struct iobref *iobref)
{
        int32_t                op_ret   = -1;
        int32_t                op_errno = 0;
        int                    _fd      = -1;
        struct posix_private * priv     = NULL;
        struct posix_fd *      pfd      = NULL;
        struct iatt            preop    = {0,};
        struct iatt            postop    = {0,};
        int                      ret      = -1;

	uint64_t  tmp_pfd   = 0;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (fd, out);
        VALIDATE_OR_GOTO (vector, out);
        VALIDATE_OR_GOTO (this->private, out);

        priv = this->private;

        VALIDATE_OR_GOTO (priv, out);

        ret = fd_ctx_get (fd, this, &tmp_pfd);
        if (ret < 0) {
                gf_log (this->name, GF_LOG_DEBUG,
			"pfd is NULL from fd=%p", fd);
                op_errno = -ret;
                goto out;
        }
	pfd = (struct posix_fd *)(long)tmp_pfd;

        _fd = pfd->fd;

        op_ret = posix_fstat_with_gfid (this, _fd, &preop);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "pre-operation fstat failed on fd=%p: %s", fd,
                        strerror (op_errno));
                goto out;
        }

        op_ret = __posix_writev (_fd, vector, count, offset,
                                 (pfd->flags & O_DIRECT));
        if (op_ret < 0) {
                op_errno = -op_ret;
                op_ret = -1;
                gf_log (this->name, GF_LOG_ERROR, "write failed: offset %"PRIu64
                        ", %s", offset, strerror (op_errno));
                goto out;
        }

        LOCK (&priv->lock);
        {
                priv->write_value    += op_ret;
        }
        UNLOCK (&priv->lock);

        if (op_ret >= 0) {
                /* wiretv successful, we also need to get the stat of
                 * the file we wrote to
                 */

                if (pfd->flushwrites) {
                        /* NOTE: ignore the error, if one occurs at this
                         * point */
                        fsync (_fd);
                }

                ret = posix_fstat_with_gfid (this, _fd, &postop);
                if (ret == -1) {
			op_ret = -1;
                        op_errno = errno;
                        gf_log (this->name, GF_LOG_ERROR, 
                                "post-operation fstat failed on fd=%p: %s",
                                fd, strerror (op_errno));
                        goto out;
                }
        }

 out:

        STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &preop, &postop);

        return 0;
}


int32_t
posix_statfs (call_frame_t *frame, xlator_t *this,
              loc_t *loc)
{
        char *                 real_path = NULL;
        int32_t                op_ret    = -1;
        int32_t                op_errno  = 0;
        struct statvfs         buf       = {0, };
        struct posix_private * priv      = NULL;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (loc, out);
        VALIDATE_OR_GOTO (this->private, out);

        MAKE_REAL_PATH (real_path, this, loc->path);

        priv = this->private;

        op_ret = statvfs (real_path, &buf);

        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR, 
                        "statvfs failed on %s: %s",
                        real_path, strerror (op_errno));
                goto out;
        }

        if (!priv->export_statfs) {
                buf.f_blocks = 0;
                buf.f_bfree  = 0;
                buf.f_bavail = 0;
                buf.f_files  = 0;
                buf.f_ffree  = 0;
                buf.f_favail = 0;
        }

        op_ret = 0;

 out:
        STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, &buf);
        return 0;
}


int32_t
posix_flush (call_frame_t *frame, xlator_t *this,
             fd_t *fd)
{
        int32_t           op_ret   = -1;
        int32_t           op_errno = 0;
        struct posix_fd * pfd      = NULL;
        int               ret      = -1;
	uint64_t          tmp_pfd  = 0;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (fd, out);

        ret = fd_ctx_get (fd, this, &tmp_pfd);
        if (ret < 0) {
                op_errno = -ret;
                gf_log (this->name, GF_LOG_DEBUG,
                        "pfd is NULL on fd=%p", fd);
                goto out;
        }
	pfd = (struct posix_fd *)(long)tmp_pfd;

        op_ret = 0;

 out:
        STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);

        return 0;
}


int32_t
posix_release (xlator_t *this,
	       fd_t *fd)
{
        struct posix_private * priv     = NULL;
        struct posix_fd *      pfd      = NULL;
        int                    ret      = -1;
	uint64_t               tmp_pfd  = 0;

        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (fd, out);

        priv = this->private;

        ret = fd_ctx_get (fd, this, &tmp_pfd);
        if (ret < 0) {
                gf_log (this->name, GF_LOG_DEBUG,
                        "pfd is NULL from fd=%p", fd);
                goto out;
        }
	pfd = (struct posix_fd *)(long)tmp_pfd;

        if (pfd->dir) {
                gf_log (this->name, GF_LOG_DEBUG,
                        "pfd->dir is %p (not NULL) for file fd=%p",
                        pfd->dir, fd);
        }

        pthread_mutex_lock (&priv->janitor_lock);
        {
                INIT_LIST_HEAD (&pfd->list);
                list_add_tail (&pfd->list, &priv->janitor_fds);
                pthread_cond_signal (&priv->janitor_cond);
        }
        pthread_mutex_unlock (&priv->janitor_lock);

        LOCK (&priv->lock);
        {
                priv->nr_files--;
        }
        UNLOCK (&priv->lock);

 out:
        return 0;
}


int32_t
posix_fsync (call_frame_t *frame, xlator_t *this,
             fd_t *fd, int32_t datasync)
{
        int32_t           op_ret   = -1;
        int32_t           op_errno = 0;
        int               _fd      = -1;
        struct posix_fd * pfd      = NULL;
        int               ret      = -1;
	uint64_t          tmp_pfd  = 0;
        struct iatt       preop = {0,};
        struct iatt       postop = {0,};

        DECLARE_OLD_FS_ID_VAR;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (fd, out);

        SET_FS_ID (frame->root->uid, frame->root->gid);

#ifdef GF_DARWIN_HOST_OS
        /* Always return success in case of fsync in MAC OS X */
        op_ret = 0;
        goto out;
#endif

        ret = fd_ctx_get (fd, this, &tmp_pfd);
        if (ret < 0) {
                op_errno = -ret;
                gf_log (this->name, GF_LOG_DEBUG, 
                        "pfd not found in fd's ctx");
                goto out;
        }
	pfd = (struct posix_fd *)(long)tmp_pfd;

        _fd = pfd->fd;

        op_ret = posix_fstat_with_gfid (this, _fd, &preop);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_DEBUG,
                        "pre-operation fstat failed on fd=%p: %s", fd,
                        strerror (op_errno));
                goto out;
        }

        if (datasync) {
                ;
#ifdef HAVE_FDATASYNC
                op_ret = fdatasync (_fd);
                if (op_ret == -1) {
                        gf_log (this->name, GF_LOG_ERROR,
                                "fdatasync on fd=%p failed: %s",
                                fd, strerror (errno));
                }
#endif
        } else {
                op_ret = fsync (_fd);
                if (op_ret == -1) {
                        op_errno = errno;
                        gf_log (this->name, GF_LOG_ERROR,
                                "fsync on fd=%p failed: %s",
                                fd, strerror (op_errno));
                        goto out;
                }
        }

        op_ret = posix_fstat_with_gfid (this, _fd, &postop);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_DEBUG,
                        "post-operation fstat failed on fd=%p: %s", fd,
                        strerror (op_errno));
                goto out;
        }

        op_ret = 0;

 out:
        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, &preop, &postop);

        return 0;
}

static int gf_posix_xattr_enotsup_log;

int
set_file_contents (xlator_t *this, char *real_path,
                   data_pair_t *trav, int flags)
{
        char *      key                        = NULL;
        char        real_filepath[ZR_PATH_MAX] = {0,};
        int32_t     file_fd                    = -1;
        int         op_ret                     = 0;
        int         ret                        = -1;

        key = &(trav->key[15]);
        sprintf (real_filepath, "%s/%s", real_path, key);

        if (flags & XATTR_REPLACE) {
                /* if file exists, replace it
                 * else, error out */
                file_fd = open (real_filepath, O_TRUNC|O_WRONLY);

                if (file_fd == -1) {
                        goto create;
                }

                if (trav->value->len) {
                        ret = write (file_fd, trav->value->data,
				     trav->value->len);
                        if (ret == -1) {
                                op_ret = -errno;
                                gf_log (this->name, GF_LOG_ERROR,
					"write failed while doing setxattr "
					"for key %s on path %s: %s",
                                        key, real_filepath, strerror (errno));
                                goto out;
                        }

                        ret = close (file_fd);
                        if (ret == -1) {
                                op_ret = -errno;
                                gf_log (this->name, GF_LOG_ERROR,
                                        "close failed on %s: %s",
                                        real_filepath, strerror (errno));
                                goto out;
                        }
                }

        create: /* we know file doesn't exist, create it */

                file_fd = open (real_filepath, O_CREAT|O_WRONLY, 0644);

                if (file_fd == -1) {
                        op_ret = -errno;
                        gf_log (this->name, GF_LOG_ERROR,
                                "failed to open file %s with O_CREAT: %s",
                                key, strerror (errno));
                        goto out;
                }

                ret = write (file_fd, trav->value->data, trav->value->len);
                if (ret == -1) {
                        op_ret = -errno;
                        gf_log (this->name, GF_LOG_ERROR,
                                "write failed on %s while setxattr with "
				"key %s: %s",
                                real_filepath, key, strerror (errno));
                        goto out;
                }

                ret = close (file_fd);
                if (ret == -1) {
                        op_ret = -errno;
                        gf_log (this->name, GF_LOG_ERROR,
                                "close failed on %s while setxattr with "
				"key %s: %s",
                                real_filepath, key, strerror (errno));
                        goto out;
                }
        }

 out:
        return op_ret;
}

int
handle_pair (xlator_t *this, char *real_path,
             data_pair_t *trav, int flags)
{
        int sys_ret = -1;
        int ret     = 0;

        if (ZR_FILE_CONTENT_REQUEST(trav->key)) {
                ret = set_file_contents (this, real_path, trav, flags);
        } else {
                sys_ret = sys_lsetxattr (real_path, trav->key,
                                         trav->value->data,
                                         trav->value->len, flags);

                if (sys_ret < 0) {
                        if (errno == ENOTSUP) {
                                GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log,
						    this->name,GF_LOG_WARNING,
						    "Extended attributes not "
						    "supported");
                        } else if (errno == ENOENT) {
                                gf_log (this->name, GF_LOG_ERROR,
                                        "setxattr on %s failed: %s", real_path,
                                        strerror (errno));
                        } else {

#ifdef GF_DARWIN_HOST_OS
				gf_log (this->name,
					((errno == EINVAL) ?
					 GF_LOG_DEBUG : GF_LOG_ERROR),
					"%s: key:%s error:%s",
					real_path, trav->key,
					strerror (errno));
#else /* ! DARWIN */
                                gf_log (this->name, GF_LOG_ERROR,
                                        "%s: key:%s error:%s",
                                        real_path, trav->key,
					strerror (errno));
#endif /* DARWIN */
                        }

                        ret = -errno;
                        goto out;
                }
        }
 out:
        return ret;
}

int32_t
posix_setxattr (call_frame_t *frame, xlator_t *this,
                loc_t *loc, dict_t *dict, int flags)
{
        int32_t       op_ret                  = -1;
        int32_t       op_errno                = 0;
        char *        real_path               = NULL;
        data_pair_t * trav                    = NULL;
        int           ret                     = -1;

        DECLARE_OLD_FS_ID_VAR;
        SET_FS_ID (frame->root->uid, frame->root->gid);

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (loc, out);
        VALIDATE_OR_GOTO (dict, out);

        MAKE_REAL_PATH (real_path, this, loc->path);

        trav = dict->members_list;

        while (trav) {
                ret = handle_pair (this, real_path, trav, flags);
                if (ret < 0) {
                        op_errno = -ret;
                        goto out;
                }
                trav = trav->next;
        }

        op_ret = 0;

 out:
        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);

        return 0;
}

int
get_file_contents (xlator_t *this, char *real_path,
                   const char *name, char **contents)
{
        char        real_filepath[ZR_PATH_MAX] = {0,};
        char *      key                        = NULL;
        int32_t     file_fd                    = -1;
        struct iatt stbuf                      = {0,};
        int         op_ret                     = 0;
        int         ret                        = -1;

        key = (char *) &(name[15]);
        sprintf (real_filepath, "%s/%s", real_path, key);

        op_ret = posix_lstat_with_gfid (this, real_filepath, &stbuf);
        if (op_ret == -1) {
                op_ret = -errno;
                gf_log (this->name, GF_LOG_ERROR, "lstat failed on %s: %s",
                        real_filepath, strerror (errno));
                goto out;
        }

        file_fd = open (real_filepath, O_RDONLY);

        if (file_fd == -1) {
                op_ret = -errno;
                gf_log (this->name, GF_LOG_ERROR, "open failed on %s: %s",
                        real_filepath, strerror (errno));
                goto out;
        }

        *contents = GF_CALLOC (stbuf.ia_size + 1, sizeof(char),
                               gf_posix_mt_char);

        if (! *contents) {
                op_ret = -errno;
                gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
                goto out;
        }

        ret = read (file_fd, *contents, stbuf.ia_size);
        if (ret <= 0) {
                op_ret = -1;
                gf_log (this->name, GF_LOG_ERROR, "read on %s failed: %s",
                        real_filepath, strerror (errno));
                goto out;
        }

        *contents[stbuf.ia_size] = '\0';

        op_ret = close (file_fd);
        file_fd = -1;
        if (op_ret == -1) {
                op_ret = -errno;
                gf_log (this->name, GF_LOG_ERROR, "close on %s failed: %s",
                        real_filepath, strerror (errno));
                goto out;
        }

 out:
        if (op_ret < 0) {
                if (*contents)
                        GF_FREE (*contents);
                if (file_fd != -1)
                        close (file_fd);
        }

        return op_ret;
}

/**
 * posix_getxattr - this function returns a dictionary with all the
 *                  key:value pair present as xattr. used for
 *                  both 'listxattr' and 'getxattr'.
 */
int32_t
posix_getxattr (call_frame_t *frame, xlator_t *this,
                loc_t *loc, const char *name)
{
        struct posix_private *priv  = NULL;
        int32_t  op_ret         = -1;
        int32_t  op_errno       = 0;
        int32_t  list_offset    = 0;
        size_t   size           = 0;
        size_t   remaining_size = 0;
        char     key[1024]      = {0,};
        char     host_buf[1024] = {0,};
        char *   value          = NULL;
        char *   list           = NULL;
        char *   real_path      = NULL;
        dict_t * dict           = NULL;
        char *   file_contents  = NULL;
        int      ret            = -1;

        DECLARE_OLD_FS_ID_VAR;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (loc, out);

        SET_FS_ID (frame->root->uid, frame->root->gid);
        MAKE_REAL_PATH (real_path, this, loc->path);

        priv = this->private;

        if (loc->inode && IA_ISDIR(loc->inode->ia_type) && name &&
	    ZR_FILE_CONTENT_REQUEST(name)) {
                ret = get_file_contents (this, real_path, name,
					 &file_contents);
                if (ret < 0) {
                        op_errno = -ret;
                        gf_log (this->name, GF_LOG_ERROR,
				"getting file contents failed: %s",
                                strerror (op_errno));
                        goto out;
                }
        }

        /* Get the total size */
        dict = get_new_dict ();
        if (!dict) {
                gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
                goto out;
        }

	if (loc->inode && IA_ISREG (loc->inode->ia_type) && name &&
	    (strcmp (name, GF_XATTR_PATHINFO_KEY) == 0)) {
                snprintf (host_buf, 1024, "%s:%s", priv->hostname,
                          real_path);
                ret = dict_set_str (dict, GF_XATTR_PATHINFO_KEY,
                                    host_buf);
                if (ret < 0) {
                        gf_log (this->name, GF_LOG_WARNING,
                                "could not set value (%s) in dictionary",
                                host_buf);
                }
                goto done;
	}

        size = sys_llistxattr (real_path, NULL, 0);
        if (size == -1) {
                op_errno = errno;
                if ((errno == ENOTSUP) || (errno == ENOSYS)) {
                        GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
                                             this->name, GF_LOG_WARNING,
                                             "Extended attributes not "
					     "supported.");
                }
                else {
                        gf_log (this->name, GF_LOG_ERROR,
				"listxattr failed on %s: %s",
                                real_path, strerror (op_errno));
                }
                goto out;
        }

        if (size == 0)
                goto done;

        list = alloca (size + 1);
        if (!list) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
                goto out;
        }

        size = sys_llistxattr (real_path, list, size);

        remaining_size = size;
        list_offset = 0;
        while (remaining_size > 0) {
                if (*(list + list_offset) == '\0')
                        break;

                strcpy (key, list + list_offset);
                op_ret = sys_lgetxattr (real_path, key, NULL, 0);
                if (op_ret == -1)
                        break;

                value = GF_CALLOC (op_ret + 1, sizeof(char),
                                   gf_posix_mt_char);
                if (!value) {
                        op_errno = errno;
                        gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
                        goto out;
                }

                op_ret = sys_lgetxattr (real_path, key, value, op_ret);
                if (op_ret == -1) {
                        op_errno = errno;

                        break;
                }

                value [op_ret] = '\0';
                dict_set (dict, key, data_from_dynptr (value, op_ret));

                remaining_size -= strlen (key) + 1;
                list_offset += strlen (key) + 1;

        } /* while (remaining_size > 0) */

 done:
        op_ret = size;

        if (dict) {
                dict_ref (dict);
        }

 out:
        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);

        if (dict)
                dict_unref (dict);

        return 0;
}


int32_t
posix_fgetxattr (call_frame_t *frame, xlator_t *this,
                 fd_t *fd, const char *name)
{
        int32_t           op_ret         = -1;
        int32_t           op_errno       = ENOENT;
        uint64_t          tmp_pfd        = 0;
        struct posix_fd * pfd            = NULL;
        int               _fd            = -1;
        int32_t           list_offset    = 0;
        size_t            size           = 0;
        size_t            remaining_size = 0;
        char              key[1024]      = {0,};
        char *            value          = NULL;
        char *            list           = NULL;
        dict_t *          dict           = NULL;
        int               ret            = -1;

        DECLARE_OLD_FS_ID_VAR;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (fd, out);

        SET_FS_ID (frame->root->uid, frame->root->gid);

        ret = fd_ctx_get (fd, this, &tmp_pfd);
        if (ret < 0) {
                op_errno = -ret;
                gf_log (this->name, GF_LOG_DEBUG,
			"pfd is NULL from fd=%p", fd);
                goto out;
        }
	pfd = (struct posix_fd *)(long)tmp_pfd;

        _fd = pfd->fd;

        /* Get the total size */
        dict = get_new_dict ();
        if (!dict) {
                gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
                goto out;
        }

        size = sys_flistxattr (_fd, NULL, 0);
        if (size == -1) {
                op_errno = errno;
                if ((errno == ENOTSUP) || (errno == ENOSYS)) {
                        GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
                                             this->name, GF_LOG_WARNING,
                                             "Extended attributes not "
					     "supported.");
                }
                else {
                        gf_log (this->name, GF_LOG_ERROR,
				"listxattr failed on %p: %s",
                                fd, strerror (op_errno));
                }
                goto out;
        }

        if (size == 0)
                goto done;

        list = alloca (size + 1);
        if (!list) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
                goto out;
        }

        size = sys_flistxattr (_fd, list, size);

        remaining_size = size;
        list_offset = 0;
        while (remaining_size > 0) {
                if(*(list + list_offset) == '\0')
                        break;

                strcpy (key, list + list_offset);
                op_ret = sys_fgetxattr (_fd, key, NULL, 0);
                if (op_ret == -1)
                        break;

                value = GF_CALLOC (op_ret + 1, sizeof(char),
                                   gf_posix_mt_char);
                if (!value) {
                        op_errno = errno;
                        gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
                        goto out;
                }

                op_ret = sys_fgetxattr (_fd, key, value, op_ret);
                if (op_ret == -1)
                        break;

                value [op_ret] = '\0';
                dict_set (dict, key, data_from_dynptr (value, op_ret));
                remaining_size -= strlen (key) + 1;
                list_offset += strlen (key) + 1;

        } /* while (remaining_size > 0) */

 done:
        op_ret = size;

        if (dict) {
                dict_ref (dict);
        }

 out:
        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict);

        if (dict)
                dict_unref (dict);

        return 0;
}


int
fhandle_pair (xlator_t *this, int fd,
              data_pair_t *trav, int flags)
{
        int sys_ret = -1;
        int ret     = 0;

        sys_ret = sys_fsetxattr (fd, trav->key, trav->value->data,
                                 trav->value->len, flags);
        
        if (sys_ret < 0) {
                if (errno == ENOTSUP) {
                        GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log,
                                            this->name,GF_LOG_WARNING,
                                            "Extended attributes not "
                                            "supported");
                } else if (errno == ENOENT) {
                        gf_log (this->name, GF_LOG_ERROR,
                                "fsetxattr on fd=%d failed: %s", fd,
                                strerror (errno));
                } else {
                        
#ifdef GF_DARWIN_HOST_OS
                        gf_log (this->name,
                                ((errno == EINVAL) ?
                                 GF_LOG_DEBUG : GF_LOG_ERROR),
                                "fd=%d: key:%s error:%s",
                                fd, trav->key,
                                strerror (errno));
#else /* ! DARWIN */
                        gf_log (this->name, GF_LOG_ERROR,
                                "fd=%d: key:%s error:%s",
                                fd, trav->key,
                                strerror (errno));
#endif /* DARWIN */
                }
                
                ret = -errno;
                goto out;
        }

out:
        return ret;
}


int32_t
posix_fsetxattr (call_frame_t *frame, xlator_t *this,
                 fd_t *fd, dict_t *dict, int flags)
{
        int32_t            op_ret       = -1;
        int32_t            op_errno     = 0;
        struct posix_fd *  pfd          = NULL;
        uint64_t           tmp_pfd      = 0;
        int                _fd          = -1;
        data_pair_t * trav              = NULL;
        int           ret               = -1;

        DECLARE_OLD_FS_ID_VAR;
        SET_FS_ID (frame->root->uid, frame->root->gid);

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (fd, out);
        VALIDATE_OR_GOTO (dict, out);

        ret = fd_ctx_get (fd, this, &tmp_pfd);
        if (ret < 0) {
                op_errno = -ret;
                gf_log (this->name, GF_LOG_DEBUG,
			"pfd is NULL from fd=%p", fd);
                goto out;
        }
	pfd = (struct posix_fd *)(long)tmp_pfd;
        _fd = pfd->fd;

        trav = dict->members_list;

        while (trav) {
                ret = fhandle_pair (this, _fd, trav, flags);
                if (ret < 0) {
                        op_errno = -ret;
                        goto out;
                }
                trav = trav->next;
        }

        op_ret = 0;

 out:
        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno);

        return 0;
}


int32_t
posix_removexattr (call_frame_t *frame, xlator_t *this,
                   loc_t *loc, const char *name)
{
        int32_t op_ret    = -1;
        int32_t op_errno  = 0;
        char *  real_path = NULL;

        DECLARE_OLD_FS_ID_VAR;

        MAKE_REAL_PATH (real_path, this, loc->path);

        SET_FS_ID (frame->root->uid, frame->root->gid);

        op_ret = sys_lremovexattr (real_path, name);

        if (op_ret == -1) {
                op_errno = errno;
		if (op_errno != ENOATTR && op_errno != EPERM)
			gf_log (this->name, GF_LOG_ERROR,
				"removexattr on %s: %s", loc->path,
                        strerror (op_errno));
                goto out;
        }

        op_ret = 0;

 out:
        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno);
        return 0;
}


int32_t
posix_fsyncdir (call_frame_t *frame, xlator_t *this,
                fd_t *fd, int datasync)
{
        int32_t           op_ret   = -1;
        int32_t           op_errno = 0;
        struct posix_fd * pfd      = NULL;
        int               ret      = -1;
	uint64_t          tmp_pfd  = 0;

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (fd, out);

        ret = fd_ctx_get (fd, this, &tmp_pfd);
        if (ret < 0) {
                op_errno = -ret;
                gf_log (this->name, GF_LOG_DEBUG,
                        "pfd is NULL, fd=%p", fd);
                goto out;
        }
	pfd = (struct posix_fd *)(long)tmp_pfd;

        op_ret = 0;

 out:
        STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno);

        return 0;
}


void
posix_print_xattr (dict_t *this,
		   char *key,
		   data_t *value,
		   void *data)
{
	gf_log ("posix", GF_LOG_DEBUG,
		"(key/val) = (%s/%d)", key, data_to_int32 (value));
}


/**
 * add_array - add two arrays of 32-bit numbers (stored in network byte order)
 * dest = dest + src
 * @count: number of 32-bit numbers
 * FIXME: handle overflow
 */

static void
__add_array (int32_t *dest, int32_t *src, int count)
{
	int i = 0;
	for (i = 0; i < count; i++) {
		dest[i] = hton32 (ntoh32 (dest[i]) + ntoh32 (src[i]));
	}
}


/**
 * xattrop - xattr operations - for internal use by GlusterFS
 * @optype: ADD_ARRAY:
 *            dict should contain:
 *               "key" ==> array of 32-bit numbers
 */

int
do_xattrop (call_frame_t *frame, xlator_t *this,
            loc_t *loc, fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr)
{
	char            *real_path = NULL;
	int32_t         *array = NULL;
	int              size = 0;
	int              count = 0;

	int              op_ret = 0;
	int              op_errno = 0;

        int              ret = 0;
	int              _fd = -1;
        uint64_t         tmp_pfd = 0;
	struct posix_fd *pfd = NULL;

	data_pair_t     *trav = NULL;

        char *    path  = NULL;
        inode_t * inode = NULL;

	VALIDATE_OR_GOTO (frame, out);
	VALIDATE_OR_GOTO (xattr, out);
	VALIDATE_OR_GOTO (this, out);

	trav = xattr->members_list;

	if (fd) {
		ret = fd_ctx_get (fd, this, &tmp_pfd);
		if (ret < 0) {
			gf_log (this->name, GF_LOG_DEBUG,
				"failed to get pfd from fd=%p",
				fd);
			op_ret = -1;
			op_errno = EBADFD;
			goto out;
		}
                pfd = (struct posix_fd *)(long)tmp_pfd;
		_fd = pfd->fd;
	}

	if (loc && loc->path)
		MAKE_REAL_PATH (real_path, this, loc->path);

        if (loc) {
                path  = gf_strdup (loc->path);
                inode = loc->inode;
        } else if (fd) {
                inode = fd->inode;
        }

	while (trav && inode) {
		count = trav->value->len / sizeof (int32_t);
		array = GF_CALLOC (count, sizeof (int32_t),
                                   gf_posix_mt_int32_t);

                LOCK (&inode->lock);
                {
                        if (loc) {
                                size = sys_lgetxattr (real_path, trav->key, (char *)array, 
                                                      trav->value->len);
                        } else {
                                size = sys_fgetxattr (_fd, trav->key, (char *)array, 
                                                      trav->value->len);
                        }

                        op_errno = errno;
                        if ((size == -1) && (op_errno != ENODATA) && 
                            (op_errno != ENOATTR)) {
                                if (op_errno == ENOTSUP) {
                                        GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log,
                                                            this->name,GF_LOG_WARNING, 
                                                            "Extended attributes not "
                                                            "supported by filesystem");
                                } else 	{
                                        if (loc)
                                                gf_log (this->name, GF_LOG_ERROR,
                                                        "getxattr failed on %s while doing "
                                                        "xattrop: %s", path,
                                                        strerror (op_errno));
                                        else
                                                gf_log (this->name, GF_LOG_ERROR,
                                                        "fgetxattr failed on fd=%d while doing "
                                                        "xattrop: %s", _fd,
                                                        strerror (op_errno));
                                }

                                op_ret = -1;
                                goto unlock;
                        }

                        switch (optype) {

                        case GF_XATTROP_ADD_ARRAY:
                                __add_array (array, (int32_t *) trav->value->data, 
                                             trav->value->len / 4);
                                break;

                        default:
                                gf_log (this->name, GF_LOG_ERROR,
                                        "Unknown xattrop type (%d) on %s. Please send "
                                        "a bug report to gluster-devel@nongnu.org",
                                        optype, path);
                                op_ret = -1;
                                op_errno = EINVAL;
                                goto unlock;
                        }

                        if (loc) {
                                size = sys_lsetxattr (real_path, trav->key, array,
                                                      trav->value->len, 0);
                        } else {
                                size = sys_fsetxattr (_fd, trav->key, (char *)array,
                                                      trav->value->len, 0);
                        }
                }
        unlock:
                UNLOCK (&inode->lock);

                if (op_ret == -1)
                        goto out;

		op_errno = errno;
		if (size == -1) {
                        if (loc)
                                gf_log (this->name, GF_LOG_ERROR,
                                        "setxattr failed on %s while doing xattrop: "
                                        "key=%s (%s)", path,
                                        trav->key, strerror (op_errno));
                        else
                                gf_log (this->name, GF_LOG_ERROR,
                                        "fsetxattr failed on fd=%d while doing xattrop: "
                                        "key=%s (%s)", _fd,
                                        trav->key, strerror (op_errno));

			op_ret = -1;
			goto out;
		} else {
			size = dict_set_bin (xattr, trav->key, array, 
					     trav->value->len);

			if (size != 0) {
                                if (loc)
                                        gf_log (this->name, GF_LOG_DEBUG,
                                                "dict_set_bin failed (path=%s): "
                                                "key=%s (%s)", path,
                                                trav->key, strerror (-size));
                                else
                                        gf_log (this->name, GF_LOG_DEBUG,
                                                "dict_set_bin failed (fd=%d): "
                                                "key=%s (%s)", _fd,
                                                trav->key, strerror (-size));

				op_ret = -1;
				op_errno = EINVAL;
				goto out;
			}
			array = NULL;
		}

		array = NULL;
		trav = trav->next;
	}

out:
	if (array)
		GF_FREE (array);

        if (path)
                GF_FREE (path);

	STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr);
	return 0;
}


int
posix_xattrop (call_frame_t *frame, xlator_t *this,
               loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr)
{
        do_xattrop (frame, this, loc, NULL, optype, xattr);
        return 0;
}


int
posix_fxattrop (call_frame_t *frame, xlator_t *this,
		fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr)
{
        do_xattrop (frame, this, NULL, fd, optype, xattr);
        return 0;
}


int
posix_access (call_frame_t *frame, xlator_t *this,
              loc_t *loc, int32_t mask)
{
        int32_t                 op_ret    = -1;
        int32_t                 op_errno  = 0;
        char                   *real_path = NULL;

        DECLARE_OLD_FS_ID_VAR;
        SET_FS_ID (frame->root->uid, frame->root->gid);

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (loc, out);

        MAKE_REAL_PATH (real_path, this, loc->path);

        op_ret = access (real_path, mask & 07);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR, "access failed on %s: %s",
                        loc->path, strerror (op_errno));
                goto out;
        }
        op_ret = 0;

 out:
        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (access, frame, op_ret, op_errno);
        return 0;
}


int32_t
posix_ftruncate (call_frame_t *frame, xlator_t *this,
                 fd_t *fd, off_t offset)
{
        int32_t               op_ret   = -1;
        int32_t               op_errno = 0;
        int                   _fd      = -1;
        struct iatt           preop    = {0,};
        struct iatt           postop   = {0,};
        struct posix_fd      *pfd      = NULL;
        int                   ret      = -1;
	uint64_t              tmp_pfd  = 0;
        struct posix_private *priv     = NULL;

        DECLARE_OLD_FS_ID_VAR;
        SET_FS_ID (frame->root->uid, frame->root->gid);

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (fd, out);

        priv = this->private;
        VALIDATE_OR_GOTO (priv, out);

        ret = fd_ctx_get (fd, this, &tmp_pfd);
        if (ret < 0) {
                gf_log (this->name, GF_LOG_DEBUG,
                        "pfd is NULL, fd=%p", fd);
                op_errno = -ret;
                goto out;
        }
	pfd = (struct posix_fd *)(long)tmp_pfd;

        _fd = pfd->fd;

        op_ret = posix_fstat_with_gfid (this, _fd, &preop);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "pre-operation fstat failed on fd=%p: %s", fd,
                        strerror (op_errno));
                goto out;
        }

        op_ret = ftruncate (_fd, offset);

        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR, 
                        "ftruncate failed on fd=%p: %s",
                        fd, strerror (errno));
                goto out;
        }

        op_ret = posix_fstat_with_gfid (this, _fd, &postop);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "post-operation fstat failed on fd=%p: %s",
                        fd, strerror (errno));
                goto out;
        }

        op_ret = 0;

 out:
        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, &preop, &postop);

        return 0;
}


int32_t
posix_fstat (call_frame_t *frame, xlator_t *this,
             fd_t *fd)
{
        int                   _fd      = -1;
        int32_t               op_ret   = -1;
        int32_t               op_errno = 0;
        struct iatt           buf      = {0,};
        struct posix_fd      *pfd      = NULL;
	uint64_t              tmp_pfd  = 0;
        int                   ret      = -1;
        struct posix_private *priv     = NULL; 

        DECLARE_OLD_FS_ID_VAR;
        SET_FS_ID (frame->root->uid, frame->root->gid);

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (fd, out);

        priv = this->private;
        VALIDATE_OR_GOTO (priv, out);

        ret = fd_ctx_get (fd, this, &tmp_pfd);
        if (ret < 0) {
                gf_log (this->name, GF_LOG_DEBUG,
                        "pfd is NULL, fd=%p", fd);
                op_errno = -ret;
                goto out;
        }
	pfd = (struct posix_fd *)(long)tmp_pfd;

        _fd = pfd->fd;

        op_ret = posix_fstat_with_gfid (this, _fd, &buf);
        if (op_ret == -1) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR, "fstat failed on fd=%p: %s",
                        fd, strerror (op_errno));
                goto out;
        }

        op_ret = 0;

out:
        SET_TO_OLD_FS_ID ();

        STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, &buf);
        return 0;
}

static int gf_posix_lk_log;

int32_t
posix_lk (call_frame_t *frame, xlator_t *this,
          fd_t *fd, int32_t cmd, struct flock *lock)
{
        struct flock nullock = {0, };

        gf_posix_lk_log++;

	GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_ERROR,
			     "\"features/locks\" translator is "
			     "not loaded. You need to use it for proper "
                             "functioning of your application.");

        STACK_UNWIND_STRICT (lk, frame, -1, ENOSYS, &nullock);
        return 0;
}

int32_t
posix_inodelk (call_frame_t *frame, xlator_t *this,
	       const char *volume, loc_t *loc, int32_t cmd, struct flock *lock)
{
	gf_log (this->name, GF_LOG_CRITICAL,
		"\"features/locks\" translator is not loaded. "
		"You need to use it for proper functioning of GlusterFS");

        STACK_UNWIND_STRICT (inodelk, frame, -1, ENOSYS);
        return 0;
}

int32_t
posix_finodelk (call_frame_t *frame, xlator_t *this,
		const char *volume, fd_t *fd, int32_t cmd, struct flock *lock)
{
	gf_log (this->name, GF_LOG_CRITICAL,
		"\"features/locks\" translator is not loaded. "
		"You need to use it for proper functioning of GlusterFS");

        STACK_UNWIND_STRICT (finodelk, frame, -1, ENOSYS);
        return 0;
}


int32_t
posix_entrylk (call_frame_t *frame, xlator_t *this,
	       const char *volume, loc_t *loc, const char *basename, 
               entrylk_cmd cmd, entrylk_type type)
{
	gf_log (this->name, GF_LOG_CRITICAL,
		"\"features/locks\" translator is not loaded. "
		"You need to use it for proper functioning of GlusterFS");

        STACK_UNWIND_STRICT (entrylk, frame, -1, ENOSYS);
        return 0;
}

int32_t
posix_fentrylk (call_frame_t *frame, xlator_t *this,
		const char *volume, fd_t *fd, const char *basename, 
                entrylk_cmd cmd, entrylk_type type)
{
	gf_log (this->name, GF_LOG_CRITICAL,
		"\"features/locks\" translator is not loaded. "
		" You need to use it for proper functioning of GlusterFS");

        STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOSYS);
        return 0;
}


int32_t
posix_do_readdir (call_frame_t *frame, xlator_t *this,
                  fd_t *fd, size_t size, off_t off, int whichop)
{
	uint64_t              tmp_pfd        = 0;
        struct posix_fd      *pfd            = NULL;
        DIR                  *dir            = NULL;
        int                   ret            = -1;
        size_t                filled         = 0;
	int                   count          = 0;
        int32_t               op_ret         = -1;
        int32_t               op_errno       = 0;
        gf_dirent_t          *this_entry     = NULL;
	gf_dirent_t           entries;
        struct dirent        *entry          = NULL;
        off_t                 in_case        = -1;
        int32_t               this_size      = -1;
        char                 *real_path      = NULL;
        int                   real_path_len  = -1;
        char                 *entry_path     = NULL;
        int                   entry_path_len = -1;
        struct posix_private *priv           = NULL;
        struct iatt           stbuf          = {0, };
        char                  base_path[PATH_MAX] = {0,};

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (fd, out);

	INIT_LIST_HEAD (&entries.list);

        priv = this->private;

        ret = fd_ctx_get (fd, this, &tmp_pfd);
        if (ret < 0) {
                gf_log (this->name, GF_LOG_DEBUG,
                        "pfd is NULL, fd=%p", fd);
                op_errno = -ret;
                goto out;
        }
	pfd = (struct posix_fd *)(long)tmp_pfd;
        if (!pfd->path) {
                op_errno = EBADFD;
                gf_log (this->name, GF_LOG_DEBUG,
                        "pfd does not have path set (possibly file "
			"fd, fd=%p)", fd);
                goto out;
        }

        real_path     = pfd->path;
        real_path_len = strlen (real_path);

        entry_path_len = real_path_len + NAME_MAX;
        entry_path     = alloca (entry_path_len);

        strncpy(base_path, POSIX_BASE_PATH(this), sizeof(base_path));
        base_path[strlen(base_path)] = '/';

        if (!entry_path) {
                op_errno = errno;
                gf_log (this->name, GF_LOG_ERROR,
                        "Out of memory.");
                goto out;
        }

        strncpy (entry_path, real_path, entry_path_len);
        entry_path[real_path_len] = '/';

        dir = pfd->dir;

        if (!dir) {
                gf_log (this->name, GF_LOG_DEBUG,
                        "dir is NULL for fd=%p", fd);
                op_errno = EINVAL;
                goto out;
        }


        if (!off) {
                rewinddir (dir);
        } else {
                seekdir (dir, off);
        }

        while (filled <= size) {
                in_case = telldir (dir);

                if (in_case == -1) {
                        op_errno = errno;
                        gf_log (this->name, GF_LOG_ERROR,
				"telldir failed on dir=%p: %s",
                                dir, strerror (errno));
                        goto out;
                }

                errno = 0;
                entry = readdir (dir);

                if (!entry) {
                        if (errno == EBADF) {
                                op_errno = errno;
                                gf_log (this->name, GF_LOG_DEBUG,
					"readdir failed on dir=%p: %s",
                                        dir, strerror (op_errno));
                                goto out;
                        }
                        break;
                }

                if ((!strcmp(real_path, base_path))
                    && (!strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)))
                        continue;

                this_size = max (sizeof (gf_dirent_t),
                                 sizeof (gfs3_dirplist))
                        + strlen (entry->d_name) + 1;

                if (this_size + filled > size) {
                        seekdir (dir, in_case);
                        break;
                }

                /* Device spanning requires that we have a stat buf for the
                 * file so we need to perform a stat on the two conditions
                 * below.
                 */
                if ((whichop == GF_FOP_READDIRP) || (priv->span_devices)) {
                        strcpy (entry_path + real_path_len + 1, entry->d_name);
                        op_ret = posix_lstat_with_gfid (this, entry_path, &stbuf);
                        if (-1 == op_ret)
                                continue;
                } else
                        stbuf.ia_ino = entry->d_ino;

                /* So at this point stbuf ino is either:
                 * a. the original inode number got from entry, in case this
                 * was a readdir fop or if device spanning was disabled.
                 *
                 * b. the scaled inode number, if device spanning was enabled
                 * or this was a readdirp fop.
                 */
                entry->d_ino = stbuf.ia_ino;

                this_entry = gf_dirent_for_name (entry->d_name);

                if (!this_entry) {
                        gf_log (this->name, GF_LOG_ERROR,
                                "could not create gf_dirent for entry %s: (%s)",
                                entry->d_name, strerror (errno));
                        goto out;
                }
                this_entry->d_off = telldir (dir);
                this_entry->d_ino = entry->d_ino;
                this_entry->d_stat = stbuf;

                list_add_tail (&this_entry->list, &entries.list);

                filled += this_size;
                count ++;
        }

        op_ret = count;
        errno = 0;
        if ((!readdir (dir) && (errno == 0)))
                op_errno = ENOENT;

 out:
        STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries);

	gf_dirent_free (&entries);

        return 0;
}


int32_t
posix_readdir (call_frame_t *frame, xlator_t *this,
               fd_t *fd, size_t size, off_t off)
{
        posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR);
        return 0;
}


int32_t
posix_readdirp (call_frame_t *frame, xlator_t *this,
                fd_t *fd, size_t size, off_t off)
{
        posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP);
        return 0;
}

int32_t
posix_priv (xlator_t *this)
{
        struct posix_private *priv = NULL;
        char  key_prefix[GF_DUMP_MAX_BUF_LEN];
        char  key[GF_DUMP_MAX_BUF_LEN];

        snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, 
                       this->name);
        gf_proc_dump_add_section(key_prefix);

        if (!this) 
                return 0;

        priv = this->private;

        if (!priv) 
                return 0;

        gf_proc_dump_build_key(key, key_prefix, "base_path");
        gf_proc_dump_write(key,"%s", priv->base_path);
        gf_proc_dump_build_key(key, key_prefix, "base_path_length");
        gf_proc_dump_write(key,"%d", priv->base_path_length);
        gf_proc_dump_build_key(key, key_prefix, "max_read");
        gf_proc_dump_write(key,"%d", priv->read_value);
        gf_proc_dump_build_key(key, key_prefix, "max_write");
        gf_proc_dump_write(key,"%d", priv->write_value);
        gf_proc_dump_build_key(key, key_prefix, "nr_files");
        gf_proc_dump_write(key,"%ld", priv->nr_files);

        return 0;
}

int32_t
posix_inode (xlator_t *this)
{
        return 0;
}


int32_t
posix_rchecksum (call_frame_t *frame, xlator_t *this,
                 fd_t *fd, off_t offset, int32_t len)
{
        char *buf = NULL;

        int       _fd      = -1;
        uint64_t  tmp_pfd  =  0;

        struct posix_fd *pfd  = NULL;

        int op_ret   = -1;
        int op_errno = 0;

        int ret = 0;

        int32_t weak_checksum = 0;
        uint8_t strong_checksum[MD5_DIGEST_LEN];

        VALIDATE_OR_GOTO (frame, out);
        VALIDATE_OR_GOTO (this, out);
        VALIDATE_OR_GOTO (fd, out);

        memset (strong_checksum, 0, MD5_DIGEST_LEN);
        buf = GF_CALLOC (1, len, gf_posix_mt_char);

        if (!buf) {
                op_errno = ENOMEM;
                gf_log (this->name, GF_LOG_ERROR,
                        "Out of memory");
                goto out;
        }

        ret = fd_ctx_get (fd, this, &tmp_pfd);
        if (ret < 0) {
                gf_log (this->name, GF_LOG_DEBUG,
                        "pfd is NULL, fd=%p", fd);
                op_errno = -ret;
                goto out;
        }
	pfd = (struct posix_fd *)(long) tmp_pfd;

        _fd = pfd->fd;

        ret = pread (_fd, buf, len, offset);
        if (ret < 0) {
                gf_log (this->name, GF_LOG_DEBUG,
                        "pread of %d bytes returned %d (%s)",
                        len, ret, strerror (errno));

                op_errno = errno;
                goto out;
        }

        weak_checksum = gf_rsync_weak_checksum (buf, len);
        gf_rsync_strong_checksum (buf, len, strong_checksum);

        GF_FREE (buf);

        op_ret = 0;
out:
        STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno,
                             weak_checksum, strong_checksum);
        return 0;
}


/**
 * notify - when parent sends PARENT_UP, send CHILD_UP event from here
 */
int32_t
notify (xlator_t *this,
        int32_t event,
        void *data,
        ...)
{
        switch (event)
                {
                case GF_EVENT_PARENT_UP:
                        {
                                /* Tell the parent that posix xlator is up */
                                default_notify (this, GF_EVENT_CHILD_UP, data);
                        }
                        break;
                default:
                        /* */
                        break;
                }
        return 0;
}

int32_t
mem_acct_init (xlator_t *this)
{
        int     ret = -1;

        if (!this)
                return ret;

        ret = xlator_mem_acct_init (this, gf_posix_mt_end + 1);
        
        if (ret != 0) {
                gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
                                "failed");
                return ret;
        }

        return ret;
}

/**
 * init -
 */
int
init (xlator_t *this)
{
        struct posix_private  *_private      = NULL;
        data_t                *dir_data      = NULL;
	data_t                *tmp_data      = NULL;
        struct stat            buf           = {0,};
	gf_boolean_t           tmp_bool      = 0;
        uint64_t               time64        = 0;
        int                    dict_ret      = 0;
        int                    ret           = 0;
        int                    op_ret        = -1;
        int32_t                janitor_sleep = 0;

        dir_data = dict_get (this->options, "directory");

        if (this->children) {
                gf_log (this->name, GF_LOG_CRITICAL,
                        "FATAL: storage/posix cannot have subvolumes");
                ret = -1;
                goto out;
        }

	if (!this->parents) {
		gf_log (this->name, GF_LOG_WARNING,
			"Volume is dangling. Please check the volume file.");
	}

        if (!dir_data) {
                gf_log (this->name, GF_LOG_CRITICAL,
                        "Export directory not specified in volume file.");
                ret = -1;
                goto out;
        }

        umask (000); // umask `masking' is done at the client side

        /* Check whether the specified directory exists, if not log it. */
        op_ret = stat (dir_data->data, &buf);
        if ((op_ret != 0) || !S_ISDIR (buf.st_mode)) {
                gf_log (this->name, GF_LOG_ERROR,
                        "Directory '%s' doesn't exist, exiting.",
			dir_data->data);
                ret = -1;
                goto out;
        }


        /* Check for Extended attribute support, if not present, log it */
        op_ret = sys_lsetxattr (dir_data->data,
			    "trusted.glusterfs.test", "working", 8, 0);
        if (op_ret < 0) {
		tmp_data = dict_get (this->options,
				     "mandate-attribute");
		if (tmp_data) {
			if (gf_string2boolean (tmp_data->data,
					       &tmp_bool) == -1) {
				gf_log (this->name, GF_LOG_ERROR,
					"wrong option provided for key "
					"\"mandate-attribute\"");
				ret = -1;
				goto out;
			}
			if (!tmp_bool) {
				gf_log (this->name, GF_LOG_WARNING,
					"Extended attribute not supported, "
					"starting as per option");
			} else {
				gf_log (this->name, GF_LOG_CRITICAL,
					"Extended attribute not supported, "
					"exiting.");
				ret = -1;
				goto out;
			}
		} else {
			gf_log (this->name, GF_LOG_CRITICAL,
				"Extended attribute not supported, exiting.");
			ret = -1;
			goto out;
		}
        }

        _private = GF_CALLOC (1, sizeof (*_private),
                              gf_posix_mt_posix_private);
        if (!_private) {
                gf_log (this->name, GF_LOG_ERROR,
                        "Out of memory.");
                ret = -1;
                goto out;
        }

        _private->base_path = gf_strdup (dir_data->data);
        _private->base_path_length = strlen (_private->base_path);

        _private->trash_path = GF_CALLOC (1, _private->base_path_length
                                          + strlen ("/")
                                          + strlen (GF_REPLICATE_TRASH_DIR)
                                          + 1,
                                          gf_posix_mt_trash_path);

        if (!_private->trash_path) {
                gf_log (this->name, GF_LOG_ERROR,
                        "Out of memory.");
                ret = -1;
                goto out;
        }

        strncpy (_private->trash_path, _private->base_path, _private->base_path_length);
        strcat (_private->trash_path, "/" GF_REPLICATE_TRASH_DIR);

        LOCK_INIT (&_private->lock);

        ret = dict_get_str (this->options, "hostname", &_private->hostname);
        if (ret) {
                _private->hostname = GF_CALLOC (256, sizeof (char),
                                                gf_common_mt_char);
                if (!_private->hostname) {
                        gf_log (this->name, GF_LOG_ERROR, "not enough memory");
                        goto out;
                }
                ret = gethostname (_private->hostname, 256);
                if (ret < 0) {
                        gf_log (this->name, GF_LOG_WARNING,
                                "could not find hostname (%s)", strerror (errno));
                }
        }

        _private->export_statfs = 1;
        tmp_data = dict_get (this->options, "export-statfs-size");
        if (tmp_data) {
		if (gf_string2boolean (tmp_data->data,
				       &_private->export_statfs) == -1) {
			ret = -1;
			gf_log (this->name, GF_LOG_ERROR,
				"'export-statfs-size' takes only boolean "
				"options");
			goto out;
		}
                if (!_private->export_statfs)
                        gf_log (this->name, GF_LOG_DEBUG,
				"'statfs()' returns dummy size");
        }

        _private->background_unlink = 0;
        tmp_data = dict_get (this->options, "background-unlink");
        if (tmp_data) {
		if (gf_string2boolean (tmp_data->data,
				       &_private->background_unlink) == -1) {
			ret = -1;
			gf_log (this->name, GF_LOG_ERROR,
				"'background-unlink' takes only boolean "
				"options");
			goto out;
		}

                if (_private->background_unlink)
                        gf_log (this->name, GF_LOG_DEBUG,
				"unlinks will be performed in background");
        }

        tmp_data = dict_get (this->options, "o-direct");
        if (tmp_data) {
		if (gf_string2boolean (tmp_data->data,
				       &_private->o_direct) == -1) {
			ret = -1;
			gf_log (this->name, GF_LOG_ERROR,
				"wrong option provided for 'o-direct'");
			goto out;
		}
		if (_private->o_direct)
                        gf_log (this->name, GF_LOG_DEBUG,
                                "o-direct mode is enabled (O_DIRECT "
				"for every open)");
        }

        _private->num_devices_to_span = 1;

        tmp_data = dict_get (this->options, "span-devices");
        if (tmp_data) {
		if (gf_string2int32 (tmp_data->data,
                                     &_private->num_devices_to_span) == -1) {
			ret = -1;
			gf_log (this->name, GF_LOG_ERROR,
				"wrong option provided for 'span-devices'");
			goto out;
		}
		if (_private->num_devices_to_span > 1) {
                        gf_log (this->name, GF_LOG_NORMAL,
                                "spanning enabled accross %d mounts", 
                                _private->num_devices_to_span);
                        _private->span_devices = 1;
                }
                if (_private->num_devices_to_span < 1)
                        _private->num_devices_to_span = 1;
        }

        _private->st_device = GF_CALLOC (1, (sizeof (dev_t) *
                                         _private->num_devices_to_span),
                                         gf_posix_mt_posix_dev_t);
        
        /* Start with the base */
        _private->st_device[0] = buf.st_dev;

        _private->janitor_sleep_duration = 600;

	dict_ret = dict_get_int32 (this->options, "janitor-sleep-duration",
                                   &janitor_sleep);
	if (dict_ret == 0) {
		gf_log (this->name, GF_LOG_DEBUG,
			"Setting janitor sleep duration to %d.",
			janitor_sleep);

		_private->janitor_sleep_duration = janitor_sleep;
	}

        LOCK_INIT (&_private->gen_lock);
        time64 = time (NULL);
        _private->gen_seq = (time64 << 32);

#ifndef GF_DARWIN_HOST_OS
        {
                struct rlimit lim;
                lim.rlim_cur = 1048576;
                lim.rlim_max = 1048576;

                if (setrlimit (RLIMIT_NOFILE, &lim) == -1) {
                        gf_log (this->name, GF_LOG_WARNING,
				"Failed to set 'ulimit -n "
				" 1048576': %s", strerror(errno));
                        lim.rlim_cur = 65536;
                        lim.rlim_max = 65536;

                        if (setrlimit (RLIMIT_NOFILE, &lim) == -1) {
                                gf_log (this->name, GF_LOG_WARNING,
					"Failed to set maximum allowed open "
                                        "file descriptors to 64k: %s", 
                                        strerror(errno));
                        }
                        else {
                                gf_log (this->name, GF_LOG_NORMAL,
					"Maximum allowed open file descriptors "
                                        "set to 65536");
                        }
                }
        }
#endif
        this->private = (void *)_private;

        pthread_mutex_init (&_private->janitor_lock, NULL);
        pthread_cond_init (&_private->janitor_cond, NULL);
        INIT_LIST_HEAD (&_private->janitor_fds);

        posix_spawn_janitor_thread (this);
 out:
        return ret;
}

void
fini (xlator_t *this)
{
        struct posix_private *priv = this->private;
        sys_lremovexattr (priv->base_path, "trusted.glusterfs.test");
        GF_FREE (priv);
        return;
}

struct xlator_dumpops dumpops = {
        .priv    = posix_priv,
        .inode   = posix_inode,
};

struct xlator_fops fops = {
        .lookup      = posix_lookup,
        .stat        = posix_stat,
        .opendir     = posix_opendir,
        .readdir     = posix_readdir,
        .readdirp    = posix_readdirp,
        .readlink    = posix_readlink,
        .mknod       = posix_mknod,
        .mkdir       = posix_mkdir,
        .unlink      = posix_unlink,
        .rmdir       = posix_rmdir,
        .symlink     = posix_symlink,
        .rename      = posix_rename,
        .link        = posix_link,
        .truncate    = posix_truncate,
        .create      = posix_create,
        .open        = posix_open,
        .readv       = posix_readv,
        .writev      = posix_writev,
        .statfs      = posix_statfs,
        .flush       = posix_flush,
        .fsync       = posix_fsync,
        .setxattr    = posix_setxattr,
        .fsetxattr   = posix_fsetxattr,
        .getxattr    = posix_getxattr,
        .fgetxattr   = posix_fgetxattr,
        .removexattr = posix_removexattr,
        .fsyncdir    = posix_fsyncdir,
        .access      = posix_access,
        .ftruncate   = posix_ftruncate,
        .fstat       = posix_fstat,
        .lk          = posix_lk,
	.inodelk     = posix_inodelk,
	.finodelk    = posix_finodelk,
	.entrylk     = posix_entrylk,
	.fentrylk    = posix_fentrylk,
        .rchecksum   = posix_rchecksum,
	.xattrop     = posix_xattrop,
	.fxattrop    = posix_fxattrop,
        .setattr     = posix_setattr,
        .fsetattr    = posix_fsetattr,
};

struct xlator_cbks cbks = {
	.release     = posix_release,
	.releasedir  = posix_releasedir,
	.forget      = posix_forget
};

struct volume_options options[] = {
	{ .key  = {"o-direct"},
	  .type = GF_OPTION_TYPE_BOOL },
	{ .key  = {"directory"},
	  .type = GF_OPTION_TYPE_PATH },
	{ .key  = {"hostname"},
	  .type = GF_OPTION_TYPE_ANY },
	{ .key  = {"export-statfs-size"},
	  .type = GF_OPTION_TYPE_BOOL },
	{ .key  = {"mandate-attribute"},
	  .type = GF_OPTION_TYPE_BOOL },
	{ .key  = {"span-devices"},
	  .type = GF_OPTION_TYPE_INT },
        { .key  = {"background-unlink"},
          .type = GF_OPTION_TYPE_BOOL },
        { .key  = {"janitor-sleep-duration"},
          .type = GF_OPTION_TYPE_INT },
	{ .key  = {NULL} }
};