/* $Id: pfs_nn_internal.x 450 2011-10-11 12:30:40Z gerd $ -*- c -*- */Internal interfaces used by the namenodes
#include "pfs_types.x" #ifndef PFS_NN_INTERNAL_X #define PFS_NN_INTERNAL_X
Elect
program Elect { version V1 {
null
void null(void) = 0;
announce
ann_result announce(announcement) = 1;At cluster start the namenodes start calling the
announce
RPC of all other namenodes - until they get a
reply from each, or until the end of the startup period is
reached.
If received within the startup period, the response is ANN_ACCEPT
if the announcement is better than the server to which it is sent.
Otherwise it is ANN_REJECT
.
If received after startup, the response is always ANN_REJECT
,
and the
sender must not start up.
The response is ANN_SELF
if the namenode instance sends the
announcement to itself.
As all namenodes call announce
of all other namenodes, the
question is whether there is a winner. If we assume there
is a total ordering between the announcement
s, there is
a best announcement if no two namenodes emit equal announcements.
So given the announcements are all distinct, there is a winner.
set_coordinator
void set_coordinator(mediumstring, mediumstring, mediumstring) = 2;When the end of the startup period is reached, one of the name nodes sends
set_coordinator
to all other nodes, and becomes the
coordinator. The coordinator must be eligible by all other nodes
that actually respond. Also, the coordinator must have a highest
revision number, and among all nodes with the highest revision
number, the coordinator has the lowest rank.
The first arg is the "host:port" name of the coordinator. (Instead of the host name this string can also name the coordinator by IP address if this is the preferred method of referencing it.)
The second arg is the clustername.
The third arg is the revision identifier.
There is right now no provision for the case that the coordinator crashes - no other node is then automatically elected. Best is to restart everything then.
} = 1; } = 0x8000f001;
Nameslave
program Nameslave { version V1 { /* This is what the non-coordinators implement */
null
void null(void) = 0;
begin_transaction
void begin_transaction(mediumstring, mediumstring) = 1;Begin a transaction: clustername, expected_rev. The 2nd arg is the expected revision string
prepare_commit
bool prepare_commit(void) = 2;Result is true if the name database could be updated.
commit
void commit(void) = 3;The response of
commit
is the ACK in the extended 2-phase
commit protocol
/* void abort(void) = 4; */Note that the names of the following RPCs correspond to function names in
Nn_db
:
set_ds_cache
void set_ds_cache(ds_info_list) = 5;
set_ds_cache(l)
: calls Nn_db.ds_cache_init
(`Datastores l)
push_inode_ins
void push_inode_ins(hyper, inodeinfo) = 7;
push_inode_ins(inode, ii)
push_inode_upd
void push_inode_upd(hyper, inodeinfo) = 8;
push_inode_upd(inode, ii)
push_inode_upd_time
void push_inode_upd_time(hyper, time_opt, time_opt) = 18;
push_inode_upd_time(inode, mtime, ctime)
push_inode_del
void push_inode_del(hyper) = 9;
push_inode_del(inode)
push_blockalloc_upd
void push_blockalloc_upd(int, hyper, longstring) = 10;
push_blockalloc_upd(datastore,blkidx,blkmap)
push_datastore_upd
void push_datastore_upd(int, mediumstring, hyper, bool) = 11;
push_upd_datastore(id,identity,size,enabled)
: Updates the
datastore table. If the record is new, it is added.
The blockalloc table is updated, too: For new stores, the rows are added. If the size of the existing store is increased, further rows are added.
It is an error to decrease the size.
push_datastore_del
void push_datastore_del(int) = 12;Deletes the datastore with this ID and all rows referencing it
push_revision_upd
void push_revision_upd(mediumstring) = 13;Sets the revision id in the db
push_inodeblocks_ins
void push_inodeblocks_ins(hyper, blocklist) = 14;
push_inodeblocks_ins(inode, bl)
push_inodeblocks_del
void push_inodeblocks_del(hyper, hyper, hyper) = 15;
push_inodeblocks_del(inode, blkidx, len)
push_names_ins
void push_names_ins(hyper, mediumstring, hyper) = 16;
push_names_ins(dir_inode, path, inode)
push_names_del
void push_names_del(hyper, mediumstring) = 17;
push_names_del(dir_inode,path)
push_admin_table
void push_admin_table(mediumstring, longstring) = 19;
push_admin_table(key,contents)
} = 1; } = 0x8000f002;
Monitor
program Monitor { version V1 {
null
void null(void) = 0;
start
void start(void) = 1;Starts the monitor: First, the state is loaded from the db. Second, all known datanodes are discovered and enabled. Third, the newsfeed for monitoring results is started. Fourth, the Dn_admin interface is enabled.
} = 1; } = 0x8000f003; #endif