module Mapred_job_config:sig
..end
type
m_job_config
val extract_job_config : Netplex_types.config_file ->
(string * string) list ->
string list -> Mapred_def.mapred_job_config * m_job_config
let (jc, mjc) = extract_job_config cf args custom_params
:
Extracts the job configuration from cf
. The association list
args
may contain overrides (leftmost value is taken).
Returns the configuration as object jc
, and in a marshallable
representation mjc
.
val mapred_job_config : m_job_config -> Mapred_def.mapred_job_config
val marshal : m_job_config -> string
val unmarshal : string -> m_job_config
val update_job_config : ?name:string ->
?input_dir:string ->
?input_dir_designation:Mapred_def.designation ->
?output_dir:string ->
?work_dir:string ->
?log_dir:string ->
?task_files:string list ->
?bigblock_size:int ->
?map_tasks:int ->
?merge_limit:int ->
?split_limit:int ->
?partitions:int ->
?enhanced_mapping:int ->
?phases:Mapred_def.phases ->
?custom:(string * string) list ->
?map_whole_files:bool ->
m_job_config -> m_job_config
val test_job_config : unit -> m_job_config
netplex {
...
mapredjob {
<name> = <value>;
...
}
}
The possible names are the method names of Mapred_def.mapred_job_config
.
The values should have the right type.
Example:
netplex {
mapredjob {
name = "my_job";
input_dir = "/input";
input_dir_designation = "deep_dir";
output_dir = "/output";
work_dir = "/work";
log_dir = "/log";
bigblock_size = 65536;
map_tasks = 100;
merge_limit = 4;
split_limit = 4;
partitions = 20;
phases = "map_sort_reduce";
}
}
All settings have default values:
name
is set to an automatically generated namebigblock_size
is 16Mmap_tasks
is 0 (meaning a good value is computed at runtime)merge_limit
and split_limit
are 4partitions
is 1phases
is `Map_sort_reduce
map_whole_files
is false