-
Notifications
You must be signed in to change notification settings - Fork 419
This wiki page serves to be a reference for the usage of the LibPostal API.
- libpostal_setup
- libpostal_setup_datadir
- libpostal_setup_parser
- libpostal_setup_parser_datadir
- libpostal_setup_language_classifier
- libpostal_setup_language_classifier_datadir
- libpostal_teardown
- libpostal_teardown_parser
- libpostal_teardown_language_classifier
- libpostal_place_languages
- libpostal_get_default_options
- libpostal_expand_address
- libpostal_expand_address_root
- libpostal_expansion_array_destroy
- libpostal_get_address_parser_default_options
- libpostal_parse_address
- libpostal_address_parser_response_destroy
- libpostal_get_near_dupe_hash_default_options
- libpostal_near_dupe_hashes
- libpostal_near_dupe_hashes_languages
- libpostal_get_default_duplicate_options
- libpostal_get_duplicate_options_with_languages
- libpostal_is_toponym_duplicate
- libpostal_is_name_duplicate
- libpostal_is_street_duplicate
- libpostal_is_house_number_duplicate
- libpostal_is_po_box_duplicate
- libpostal_is_unit_duplicate
- libpostal_is_floor_duplicate
- libpostal_is_postal_code_duplicate
- libpostal_get_default_fuzzy_duplicate_options
- libpostal_get_default_fuzzy_duplicate_options_with_languages
- libpostal_is_name_duplicate_fuzzy
- libpostal_is_street_duplicate_fuzzy
- libpostal_normalize_string
- libpostal_normalize_string_languages
- libpostal_tokenize
- libpostal_normalized_tokens
- libpostal_normalized_tokens_languages
- libpostal_parser_print_features
- libpostal_normalize_options_t
- libpostal_address_parser_options_t
- libpostal_address_parser_response_t
- libpostal_near_dupe_hash_options_t
- libpostal_duplicate_options_t
- libpostal_fuzzy_duplicate_options_t
- libpostal_fuzzy_duplicate_status_t
- libpostal_token_t
- libpostal_normalized_token_t
Before LibPostals functions can be used, the library must be initialised. LibPostal requires it's core to be initialised first followed by the modules you wish to use.
bool libpostal_setup(void);
- return
bool
: Whether the operation was a success.
This will initialise LibPostal using the default directory set at compile time.
bool libpostal_setup_datadir(char *datadir);
- return
bool
: Whether the operation was a success. -
char *datadir
: The directory from which to load LibPostal.
This will initialise LibPostal using the directory specified within the datadir
variable.
bool libpostal_setup_parser(void);
- return
bool
: Whether the operation was a success.
This will initialise LibPostals Address Parser using the default directory set at compile time.
bool libpostal_setup_parser_datadir(char *datadir);
- return
bool
: Whether the operation was a success. -
char *datadir
: The directory from which to load LibPostal.
This will initialise LibPostals Address Parser using the directory specified within the datadir
variable.
bool libpostal_setup_language_classifier(void);
- return
bool
: Whether the operation was a success.
This will initialise LibPostals Language Classifier using the default directory set at compile time.
bool libpostal_setup_language_classifier_datadir(char *datadir);
- return
bool
: Whether the operation was a success. -
char *datadir
: The directory from which to load LibPostal.
This will initialise LibPostals Language Classifier using the directory specified within the datadir
variable.
These free up the resources used by LibPostals modules.
void libpostal_teardown(void);
void libpostal_teardown_parser(void);
void libpostal_teardown_language_classifier(void);
#ToDo: Write Description
char **libpostal_place_languages(size_t num_components, char **labels, char **values, size_t *num_languages);
- return
char **
: A list on languages used by the components. -
size_t num_components
: The number of components to feed in. -
char **labels
: The component labels. -
char **values
: The component values. -
size_t *num_languages
: The returned number of languages.
This function analyses the components and predicts the languages used.
Before using the Address Expansion, you must initialize the Language Classifier (see Initialisers). #ToDo: Write Description
typedef struct libpostal_normalize_options {
// List of language codes
char **languages;
size_t num_languages;
uint16_t address_components;
// String options
bool latin_ascii;
bool transliterate;
bool strip_accents;
bool decompose;
bool lowercase;
bool trim_string;
bool drop_parentheticals;
bool replace_numeric_hyphens;
bool delete_numeric_hyphens;
bool split_alpha_from_numeric;
bool replace_word_hyphens;
bool delete_word_hyphens;
bool delete_final_periods;
bool delete_acronym_periods;
bool drop_english_possessives;
bool delete_apostrophes;
bool expand_numex;
bool roman_numerals;
} libpostal_normalize_options_t;
libpostal_normalize_options_t libpostal_get_default_options(void);
- return
libpostal_normalize_options_t
: #ToDo
char **libpostal_expand_address(char *input, libpostal_normalize_options_t options, size_t *n);
- return
char **
: #ToDo -
char *input
: #ToDo -
libpostal_normalize_options_t options
: #ToDo -
size_t *n
: #ToDo
char **libpostal_expand_address_root(char *input, libpostal_normalize_options_t options, size_t *n);
- return
char **
: #ToDo -
libpostal_normalize_options_t options
: #ToDo -
size_t *n
: #ToDo
void libpostal_expansion_array_destroy(char **expansions, size_t n);
-
char **expansions
: #ToDo -
site_t n
: #ToDo
Before using the Address Parser, you must initialize the Address Parser (see Initialisers).
typedef struct libpostal_address_parser_options {
char *language;
char *country;
} libpostal_address_parser_options_t;
typedef struct libpostal_address_parser_response {
size_t num_components;
char **components;
char **labels;
} libpostal_address_parser_response_t;
libpostal_address_parser_options_t libpostal_get_address_parser_default_options(void);
- return
libpostal_address_parser_options_t
: #ToDo
libpostal_address_parser_response_t *libpostal_parse_address(char *address, libpostal_address_parser_options_t options);
- return
libpostal_address_parser_response_t *
: #ToDo -
char *address
: #ToDo -
libpostal_address_parser_options_t options
: #ToDo
void libpostal_address_parser_response_destroy(libpostal_address_parser_response_t *self);
Before using the Geo Hashing (?), you must initialize the Language Classifier (see Initialisers).
#ToDo: Write Description
typedef struct libpostal_near_dupe_hash_options {
bool with_name;
bool with_address;
bool with_unit;
bool with_city_or_equivalent;
bool with_small_containing_boundaries;
bool with_postal_code;
bool with_latlon;
double latitude;
double longitude;
uint32_t geohash_precision;
bool name_and_address_keys;
bool name_only_keys;
bool address_only_keys;
} libpostal_near_dupe_hash_options_t;
libpostal_near_dupe_hash_options_t libpostal_get_near_dupe_hash_default_options(void);
- return
libpostal_near_dupe_hash_options_t
: #ToDo
char **libpostal_near_dupe_hashes(size_t num_components, char **labels, char **values, libpostal_near_dupe_hash_options_t options, size_t *num_hashes);
- return
char **
: #ToDo -
size_t num_components
: #ToDo -
char **labels
: #ToDo -
char **values
: #ToDo -
libpostal_near_dupe_hash_options_t options
: #ToDo -
size_t *num_hashes
: #ToDo
char **libpostal_near_dupe_hashes_languages(size_t num_components, char **labels, char **values, libpostal_near_dupe_hash_options_t options, size_t num_languages, char **languages, size_t *num_hashes);
- return
char **
: #ToDo -
size_t num_components
: #ToDo -
char **labels
: #ToDo -
char **values
: #ToDo -
libpostal_near_dupe_hash_options_t options
: #ToDo -
size_t num_languages
: #ToDo -
char **languages
: #ToDo -
size_t *num_hashes
: #ToDo
Before using the Address Matching, you must initialize the Language Classifier (see Initialisers).
#ToDo: Write Description
typedef struct libpostal_duplicate_options {
size_t num_languages;
char **languages;
} libpostal_duplicate_options_t;
typedef enum {
LIBPOSTAL_NULL_DUPLICATE_STATUS = -1,
LIBPOSTAL_NON_DUPLICATE = 0,
LIBPOSTAL_POSSIBLE_DUPLICATE_NEEDS_REVIEW = 3,
LIBPOSTAL_LIKELY_DUPLICATE = 6,
LIBPOSTAL_EXACT_DUPLICATE = 9,
} libpostal_duplicate_status_t;
libpostal_duplicate_options_t libpostal_get_default_duplicate_options(void);
- return
libpostal_duplicate_options_t
: An object containing the default options for Address Matching.
libpostal_duplicate_options_t libpostal_get_duplicate_options_with_languages(size_t num_languages, char **languages);
- return
libpostal_duplicate_options_t
: An object containing the default options for Address Matching. -
size_t num_languages
: The number on languages in the languages array. -
char **languages
: The languages to use for Address Matching.
The languages can be feed from the Detect Langauges function.
libpostal_duplicate_status_t libpostal_is_toponym_duplicate(size_t num_components1, char **labels1, char **values1, size_t num_components2, char **labels2, char **values2, libpostal_duplicate_options_t options);
- return
libpostal_duplicate_status_t
: #ToDo -
size_t num_components1
: #ToDo -
char **labels1
: #ToDo -
char **values1
: #ToDo -
size_t num_components2
: #ToDo -
char **labels2
: #ToDo -
char **values2
: #ToDo -
libpostal_duplicate_options_t options
: #ToDo
This function is intended to be given the results of two seperate calls to libpostal_parse_address.
libpostal_duplicate_status_t libpostal_is_name_duplicate(char *value1, char *value2, libpostal_duplicate_options_t options);
- return
libpostal_duplicate_status_t
: An enum value classifying the likely-hood of the two names being the same. -
char *value1
: #ToDo -
char *value2
: #ToDo -
libpostal_duplicate_options_t options
: #ToDo
libpostal_duplicate_status_t libpostal_is_street_duplicate(char *value1, char *value2, libpostal_duplicate_options_t options);
- return
libpostal_duplicate_status_t
: An enum value classifying the likely-hood of the two streets being the same. -
char *value1
: #ToDo -
char *value2
: #ToDo -
libpostal_duplicate_options_t options
: #ToDo
libpostal_duplicate_status_t libpostal_is_house_number_duplicate(char *value1, char *value2, libpostal_duplicate_options_t options);
- return
libpostal_duplicate_status_t
: An enum value classifying the likely-hood of the two house numbers being the same. -
char *value1
: #ToDo -
char *value2
: #ToDo -
libpostal_duplicate_options_t options
: #ToDo
libpostal_duplicate_status_t libpostal_is_po_box_duplicate(char *value1, char *value2, libpostal_duplicate_options_t options);
- return
libpostal_duplicate_status_t
: An enum value classifying the likely-hood of the two po boxes being the same. -
char *value1
: #ToDo -
char *value2
: #ToDo -
libpostal_duplicate_options_t options
: #ToDo
libpostal_duplicate_status_t libpostal_is_unit_duplicate(char *value1, char *value2, libpostal_duplicate_options_t options);
- return
libpostal_duplicate_status_t
: An enum value classifying the likely-hood of the two units being the same. -
char *value1
: #ToDo -
char *value2
: #ToDo -
libpostal_duplicate_options_t options
: #ToDo
libpostal_duplicate_status_t libpostal_is_floor_duplicate(char *value1, char *value2, libpostal_duplicate_options_t options);
- return
libpostal_duplicate_status_t
: An enum value classifying the likely-hood of the two floors being the same. -
char *value1
: #ToDo -
char *value2
: #ToDo -
libpostal_duplicate_options_t options
: #ToDo
libpostal_duplicate_status_t libpostal_is_postal_code_duplicate(char *value1, char *value2, libpostal_duplicate_options_t options);
- return
libpostal_duplicate_status_t
: An enum value classifying the likely-hood of the two postal codes being the same. -
char *value1
: #ToDo -
char *value2
: #ToDo -
libpostal_duplicate_options_t options
: #ToDo
#ToDo: Write Description
typedef struct libpostal_fuzzy_duplicate_options {
size_t num_languages;
char **languages;
double needs_review_threshold;
double likely_dupe_threshold;
} libpostal_fuzzy_duplicate_options_t;
typedef struct libpostal_fuzzy_duplicate_status {
libpostal_duplicate_status_t status;
double similarity;
} libpostal_fuzzy_duplicate_status_t;
libpostal_fuzzy_duplicate_options_t libpostal_get_default_fuzzy_duplicate_options(void);
- return
libpostal_fuzzy_duplicate_options_t
: #ToDo
libpostal_fuzzy_duplicate_options_t libpostal_get_default_fuzzy_duplicate_options_with_languages(size_t num_languages, char **languages);
- return
libpostal_fuzzy_duplicate_options_t
: #ToDo -
size_t num_languages
: #ToDo -
char **languages
: #ToDo
libpostal_fuzzy_duplicate_status_t libpostal_is_name_duplicate_fuzzy(size_t num_tokens1, char **tokens1, double *token_scores1, size_t num_tokens2, char **tokens2, double *token_scores2, libpostal_fuzzy_duplicate_options_t options);
- return
libpostal_fuzzy_duplicate_status_t
: #ToDo -
size_t num_tokens1
: #ToDo -
char **tokens1
: #ToDo -
double *token_scores1
: #ToDo -
size_t num_tokens2
: #ToDo -
char **tokens2
: #ToDo -
double *token_scores2
: #ToDo -
libpostal_fuzzy_duplicate_options_t options
: #ToDo
libpostal_fuzzy_duplicate_status_t libpostal_is_street_duplicate_fuzzy(size_t num_tokens1, char **tokens1, double *token_scores1, size_t num_tokens2, char **tokens2, double *token_scores2, libpostal_fuzzy_duplicate_options_t options);
- return
libpostal_fuzzy_duplicate_status_t
: #ToDo -
size_t num_tokens1
: #ToDo -
char **tokens1
: #ToDo -
double *token_scores1
: #ToDo -
size_t num_tokens2
: #ToDo -
char **tokens2
: #ToDo -
double *token_scores2
: #ToDo -
libpostal_fuzzy_duplicate_options_t options
: #ToDo
#ToDo: Write Description
typedef struct libpostal_token {
size_t offset;
size_t len;
uint16_t type;
} libpostal_token_t;
typedef struct libpostal_normalized_token {
char *str;
libpostal_token_t token;
} libpostal_normalized_token_t;
char *libpostal_normalize_string(char *input, uint64_t options);
- return
char *
: #ToDo -
char *input
: #ToDo -
uint64_t options
: #ToDo
char *libpostal_normalize_string_languages(char *input, uint64_t options, size_t num_languages, char **languages);
- return
char *
: #ToDo -
char *input
: #ToDo -
uint64_t options
: #ToDo -
size_t num_languages
: #ToDo -
char **languages
: #ToDo
libpostal_token_t *libpostal_tokenize(char *input, bool whitespace, size_t *n);
- return
libpostal_token_t *
: #ToDo -
char *input
: #ToDo -
bool whitespace
: #ToDo -
size_t *n
: #ToDo
libpostal_normalized_token_t *libpostal_normalized_tokens(char *input, uint64_t string_options, uint64_t token_options, bool whitespace, size_t *n);
- return
libpostal_normalized_token_t *
: #ToDo -
char *input
: #ToDo -
uint64_t string_options
: #ToDo -
uint64_t token_options
: #ToDo -
bool whitespace
: #ToDo -
size_t *n
: Number of tokens in the returned array.
libpostal_normalized_token_t *libpostal_normalized_tokens_languages(char *input, uint64_t string_options, uint64_t token_options, bool whitespace, size_t num_languages, char **languages, size_t *n);
- return
libpostal_normalized_token_t *
: #ToDo -
char *input
: #ToDo -
uint64_t string_options
: #ToDo -
uint64_t token_options
: #ToDo -
bool whitespace
: #ToDo -
size_t num_languages
: Number of strings in the languages array. -
char **languages
: #ToDo -
size_t *n
: Number of tokens in the returned array.
These functions are used for debuging features within LibPostal. They are not needed for normal operation.
bool libpostal_parser_print_features(bool print_features);
- return
bool
: #ToDo -
bool print_features
: This switches the feature on or off.
When switched on this will cause the LibPostal Address Parser to output debug information to stdout
(The Console).